ops, cluster: consolidate NixOS provisioning
This moves the diff-and-activate logic from cluster/nix/provision.nix
into ops/{provision,machines}.nix that can be used for both cluster
machines and bgpwtf machines.
The provisioning scripts now live per-NixOS-config, and anything under
ops.machines.$fqdn now has a .passthru.hscloud.provision derivation
which is that script. When ran, it will attempt to deploy onto the
target machine.
There's also a top-level tool at `ops.provision` which builds all
configurations / machines and can be called with the machine name/fqdn
to call the corresponding provisioner script.
clustercfg is changed to use the new provisioning logic.
Change-Id: I258abce9e8e3db42af35af102f32ab7963046353
diff --git a/ops/machines.nix b/ops/machines.nix
index 0e63228..5401e30 100644
--- a/ops/machines.nix
+++ b/ops/machines.nix
@@ -3,30 +3,41 @@
# This allows to have a common attrset of machines that can be deployed
# in the same way.
#
-# Currently building/deployment is still done in a half-assed way:
-#
-# machine=edge01.waw.bgp.wtf
-# d=$(nix-build -A 'ops.machines."'$machine'"'.toplevel)
-#
-# To then deploy derivation $d on $machine:
-#
-# nix-copy-closure --to root@$machine $d
-# ssh root@$machine $d/bin/switch-to-configuration dry-activate
-# ssh root@$machine $d/bin/switch-to-configuration test
-# ssh root@$machine nix-env -p /nix/var/nix/profiles/system --set $d
-# ssh root@$machine $d/bin/switch-to-configuration boot
-#
-# TODO(q3k): merge this with //cluster/clustercfg - this should be unified!
+# For information about building/deploying machines see //ops/README.md.
{ hscloud, pkgs, ... }:
let
+ # nixpkgs for cluster machines (.hswaw.net). Currently pinned to an old
+ # nixpkgs because NixOS modules for kubernetes changed enough that it's not
+ # super easy to use them as is.
+ #
+ # TODO(q3k): fix this: use an old nixpkgs for Kube modules while using
+ # hscloud nixpkgs for everything else.
+ nixpkgsCluster = import (pkgs.fetchFromGitHub {
+ owner = "nixos";
+ repo = "nixpkgs-channels";
+ rev = "44ad80ab1036c5cc83ada4bfa451dac9939f2a10";
+ sha256 = "1b61nzvy0d46cspy07szkc0rggacxiqg9v1py27pkqpj7rvawfsk";
+ }) {};
+
+ # edge01 still lives on an old nixpkgs checkout.
+ #
+ # TODO(b/3): unpin and deploy.
+ nixpkgsBgpwtf = import (pkgs.fetchFromGitHub {
+ owner = "nixos";
+ repo = "nixpkgs-channels";
+ rev = "c59ea8b8a0e7f927e7291c14ea6cd1bd3a16ff38";
+ sha256 = "1ak7jqx94fjhc68xh1lh35kh3w3ndbadprrb762qgvcfb8351x8v";
+ }) {};
+
# Stopgap measure to import //cluster/nix machine definitions into new
- # //ops/machines infrastructure.
+ # //ops/ infrastructure.
+ #
# TODO(q3k): inject defs-cluster-k0.nix / defs-machines.nix content via
# nixos options instead of having module definitions loading it themselves,
# deduplicate list of machines below with defs-machines.nix somehow.
- mkClusterMachine = name: pkgs.nixos ({ config, pkgs, ... }: {
+ clusterMachineConfig = name: [({ config, pkgs, ...}: {
# The hostname is used by //cluster/nix machinery to load the appropriate
# config from defs-machines into defs-cluster-k0.
networking.hostName = name;
@@ -34,29 +45,71 @@
../cluster/nix/modules/base.nix
../cluster/nix/modules/kubernetes.nix
];
- });
+ })];
+ # mkMachine builds NixOS modules into a NixOS derivation, and injects
+ # passthru.hscloud.provision which deploys that configuration over SSH to a
+ # production machine.
mkMachine = pkgs: paths: pkgs.nixos ({ config, pkgs, ... }: {
imports = paths;
+
+ config = let
+ name = config.networking.hostName;
+ domain = if (config.networking ? domain) && config.networking.domain != null then config.networking.domain else "hswaw.net";
+ fqdn = name + "." + domain;
+ toplevel = config.system.build.toplevel;
+
+ runProvision = ''
+ #!/bin/sh
+ set -eu
+ remote=root@${fqdn}
+ echo "Configuration for ${fqdn} is ${toplevel}"
+ nix copy -s --to ssh://$remote ${toplevel}
+
+ running="$(ssh $remote readlink -f /nix/var/nix/profiles/system)"
+ if [ "$running" == "${toplevel}" ]; then
+ echo "${fqdn} already running ${toplevel}."
+ else
+ echo "/etc/systemd/system diff:"
+ ssh $remote diff -ur /var/run/current-system/etc/systemd/system ${toplevel}/etc/systemd/system || true
+ echo ""
+ echo ""
+ echo "dry-activate diff:"
+ ssh $remote ${toplevel}/bin/switch-to-configuration dry-activate
+ read -p "Do you want to switch to this configuration? " -n 1 -r
+ echo
+ if ! [[ $REPLY =~ ^[Yy]$ ]]; then
+ exit 1
+ fi
+
+ echo -ne "\n\nswitch-to-configuration test...\n"
+ ssh $remote ${toplevel}/bin/switch-to-configuration test
+ fi
+
+ echo -ne "\n\n"
+ read -p "Do you want to set this configuration as boot? " -n 1 -r
+ echo
+ if ! [[ $REPLY =~ ^[Yy]$ ]]; then
+ exit 1
+ fi
+
+ echo -ne "\n\nsetting system profile...\n"
+ ssh $remote nix-env -p /nix/var/nix/profiles/system --set ${toplevel}
+
+ echo -ne "\n\nswitch-to-configuration boot...\n"
+ ssh $remote ${toplevel}/bin/switch-to-configuration boot
+ '';
+ in {
+ passthru.hscloud.provision = pkgs.writeScript "provision-${fqdn}" runProvision;
+ };
});
-
in {
- "bc01n01.hswaw.net" = mkClusterMachine "bc01n01";
- "bc01n02.hswaw.net" = mkClusterMachine "bc01n02";
- "bc01n03.hswaw.net" = mkClusterMachine "bc01n03";
- "dcr01s22.hswaw.net" = mkClusterMachine "dcr01s22";
- "dcr01s24.hswaw.net" = mkClusterMachine "dcr01s24";
+ "bc01n01.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "bc01n01");
+ "bc01n02.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "bc01n02");
+ "dcr01s22.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "dcr01s22");
+ "dcr01s24.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "dcr01s24");
- # edge01 still lives on an old nixpkgs checkout.
- # TODO(b/3): unpin and deploy.
- "edge01.waw.bgp.wtf" = mkMachine (
- import (pkgs.fetchFromGitHub {
- owner = "nixos";
- repo = "nixpkgs-channels";
- rev = "c59ea8b8a0e7f927e7291c14ea6cd1bd3a16ff38";
- sha256 = "1ak7jqx94fjhc68xh1lh35kh3w3ndbadprrb762qgvcfb8351x8v";
- }) {}
- ) [
+ "edge01.waw.bgp.wtf" = mkMachine nixpkgsBgpwtf [
../bgpwtf/machines/edge01.waw.bgp.wtf.nix
../bgpwtf/machines/edge01.waw.bgp.wtf-hardware.nix
];