ops, cluster: consolidate NixOS provisioning

This moves the diff-and-activate logic from cluster/nix/provision.nix
into ops/{provision,machines}.nix that can be used for both cluster
machines and bgpwtf machines.

The provisioning scripts now live per-NixOS-config, and anything under
ops.machines.$fqdn now has a .passthru.hscloud.provision derivation
which is that script. When ran, it will attempt to deploy onto the
target machine.

There's also a top-level tool at `ops.provision` which builds all
configurations / machines and can be called with the machine name/fqdn
to call the corresponding provisioner script.

clustercfg is changed to use the new provisioning logic.

Change-Id: I258abce9e8e3db42af35af102f32ab7963046353
diff --git a/ops/machines.nix b/ops/machines.nix
index 0e63228..5401e30 100644
--- a/ops/machines.nix
+++ b/ops/machines.nix
@@ -3,30 +3,41 @@
 # This allows to have a common attrset of machines that can be deployed
 # in the same way.
 #
-# Currently building/deployment is still done in a half-assed way:
-#
-#    machine=edge01.waw.bgp.wtf
-#    d=$(nix-build -A 'ops.machines."'$machine'"'.toplevel)
-#
-# To then deploy derivation $d on $machine:
-#
-#    nix-copy-closure --to root@$machine $d
-#    ssh root@$machine $d/bin/switch-to-configuration dry-activate
-#    ssh root@$machine $d/bin/switch-to-configuration test
-#    ssh root@$machine nix-env -p /nix/var/nix/profiles/system --set $d
-#    ssh root@$machine $d/bin/switch-to-configuration boot
-#
-# TODO(q3k): merge this with //cluster/clustercfg - this should be unified!
+# For information about building/deploying machines see //ops/README.md.
 
 { hscloud, pkgs, ... }:
 
 let
+  # nixpkgs for cluster machines (.hswaw.net). Currently pinned to an old
+  # nixpkgs because NixOS modules for kubernetes changed enough that it's not
+  # super easy to use them as is.
+  #
+  # TODO(q3k): fix this: use an old nixpkgs for Kube modules while using
+  # hscloud nixpkgs for everything else.
+  nixpkgsCluster = import (pkgs.fetchFromGitHub {
+    owner = "nixos";
+    repo = "nixpkgs-channels";
+    rev = "44ad80ab1036c5cc83ada4bfa451dac9939f2a10";
+    sha256 = "1b61nzvy0d46cspy07szkc0rggacxiqg9v1py27pkqpj7rvawfsk";
+  }) {};
+
+  # edge01 still lives on an old nixpkgs checkout.
+  #
+  # TODO(b/3): unpin and deploy.
+  nixpkgsBgpwtf = import (pkgs.fetchFromGitHub {
+    owner = "nixos";
+    repo = "nixpkgs-channels";
+    rev = "c59ea8b8a0e7f927e7291c14ea6cd1bd3a16ff38";
+    sha256 = "1ak7jqx94fjhc68xh1lh35kh3w3ndbadprrb762qgvcfb8351x8v";
+  }) {};
+
   # Stopgap measure to import //cluster/nix machine definitions into new
-  # //ops/machines infrastructure.
+  # //ops/ infrastructure.
+  #
   # TODO(q3k): inject defs-cluster-k0.nix / defs-machines.nix content via
   # nixos options instead of having module definitions loading it themselves,
   # deduplicate list of machines below with defs-machines.nix somehow.
-  mkClusterMachine = name: pkgs.nixos ({ config, pkgs, ... }: {
+  clusterMachineConfig = name: [({ config, pkgs, ...}: {
     # The hostname is used by //cluster/nix machinery to load the appropriate
     # config from defs-machines into defs-cluster-k0.
     networking.hostName = name;
@@ -34,29 +45,71 @@
       ../cluster/nix/modules/base.nix
       ../cluster/nix/modules/kubernetes.nix
     ];
-  });
+  })];
 
+  # mkMachine builds NixOS modules into a NixOS derivation, and injects
+  # passthru.hscloud.provision which deploys that configuration over SSH to a
+  # production machine.
   mkMachine = pkgs: paths: pkgs.nixos ({ config, pkgs, ... }: {
     imports = paths;
+
+    config = let
+      name = config.networking.hostName;
+      domain = if (config.networking ? domain) && config.networking.domain != null then config.networking.domain else "hswaw.net";
+      fqdn = name + "." + domain;
+      toplevel = config.system.build.toplevel;
+
+      runProvision = ''
+        #!/bin/sh
+        set -eu
+        remote=root@${fqdn}
+        echo "Configuration for ${fqdn} is ${toplevel}"
+        nix copy -s --to ssh://$remote ${toplevel}
+
+        running="$(ssh $remote readlink -f /nix/var/nix/profiles/system)"
+        if [ "$running" == "${toplevel}" ]; then
+          echo "${fqdn} already running ${toplevel}."
+        else
+          echo "/etc/systemd/system diff:"
+          ssh $remote diff -ur /var/run/current-system/etc/systemd/system ${toplevel}/etc/systemd/system || true
+          echo ""
+          echo ""
+          echo "dry-activate diff:"
+          ssh $remote ${toplevel}/bin/switch-to-configuration dry-activate
+          read -p "Do you want to switch to this configuration? " -n 1 -r
+          echo
+          if ! [[ $REPLY =~ ^[Yy]$ ]]; then
+            exit 1
+          fi
+
+          echo -ne "\n\nswitch-to-configuration test...\n"
+          ssh $remote ${toplevel}/bin/switch-to-configuration test
+        fi
+
+        echo -ne "\n\n"
+        read -p "Do you want to set this configuration as boot? " -n 1 -r
+        echo
+        if ! [[ $REPLY =~ ^[Yy]$ ]]; then
+            exit 1
+        fi
+
+        echo -ne "\n\nsetting system profile...\n"
+        ssh $remote nix-env -p /nix/var/nix/profiles/system --set ${toplevel}
+
+        echo -ne "\n\nswitch-to-configuration boot...\n"
+        ssh $remote ${toplevel}/bin/switch-to-configuration boot
+      '';
+    in {
+      passthru.hscloud.provision = pkgs.writeScript "provision-${fqdn}" runProvision;
+    };
   });
-
 in {
-  "bc01n01.hswaw.net" = mkClusterMachine "bc01n01";
-  "bc01n02.hswaw.net" = mkClusterMachine "bc01n02";
-  "bc01n03.hswaw.net" = mkClusterMachine "bc01n03";
-  "dcr01s22.hswaw.net" = mkClusterMachine "dcr01s22";
-  "dcr01s24.hswaw.net" = mkClusterMachine "dcr01s24";
+  "bc01n01.hswaw.net"  = mkMachine nixpkgsCluster (clusterMachineConfig "bc01n01");
+  "bc01n02.hswaw.net"  = mkMachine nixpkgsCluster (clusterMachineConfig "bc01n02");
+  "dcr01s22.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "dcr01s22");
+  "dcr01s24.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "dcr01s24");
 
-  # edge01 still lives on an old nixpkgs checkout.
-  # TODO(b/3): unpin and deploy.
-  "edge01.waw.bgp.wtf" = mkMachine (
-    import (pkgs.fetchFromGitHub {
-      owner = "nixos";
-      repo = "nixpkgs-channels";
-      rev = "c59ea8b8a0e7f927e7291c14ea6cd1bd3a16ff38";
-      sha256 = "1ak7jqx94fjhc68xh1lh35kh3w3ndbadprrb762qgvcfb8351x8v";
-    }) {}
-  ) [
+  "edge01.waw.bgp.wtf" = mkMachine nixpkgsBgpwtf [
     ../bgpwtf/machines/edge01.waw.bgp.wtf.nix
     ../bgpwtf/machines/edge01.waw.bgp.wtf-hardware.nix
   ];