cluster/nix: locally build nixos derivations

We change the existing behaviour (copy files & run nixos-rebuild switch)
to something closer to nixops-style. This now means that provisioning
admin machines need Nix installed locally, but that's probably an okay
choice to make.

The upside of this approach is that it's easier to debug and test
derivations, as all data is local to the repo and the workstation, and
deploying just means copying a configuration closure and switching the
system to it. At some point we should even be able to run the entire
cluster within a set of test VMs.

We also bump the kubernetes control plane to 1.14. Kubelets are still at
1.13 and their upgrade is comint up today too.

Change-Id: Ia9832c47f258ee223d93893d27946d1161cc4bbd
diff --git a/cluster/nix/cluster-configuration.nix b/cluster/nix/cluster-configuration.nix
deleted file mode 100644
index dde65fe..0000000
--- a/cluster/nix/cluster-configuration.nix
+++ /dev/null
@@ -1,265 +0,0 @@
-{ config, pkgs, lib, ... }:
-
-with ( import ./toplevel.nix );
-let
-  fqdn = config.networking.hostName + domain;
-  node = (builtins.head (builtins.filter (n: n.fqdn == fqdn) nodes));
-  otherNodes = (builtins.filter (n: n.fqdn != fqdn) nodes);
-
-  # Pin for k8s packages. This is so that upagrading the system will not upgrade the k8s control or data planes.
-  k8spkgs = import (fetchGit {
-    name = "nixos-unstable-2019-04-12";
-    url = https://github.com/nixos/nixpkgs/;
-    rev = "1fc591f9a5bd1b016b5d66dfab29560073955a14";
-  }) {};
-
-  infraContainer = pkgs.dockerTools.buildImage {
-    name = "pause";
-    tag = "latest";
-    contents = k8spkgs.kubernetes.pause;
-    config.Cmd = "/bin/pause";
-  };
-
-
-in rec {
-  imports =
-    [ # Include the results of the hardware scan.
-      ./hardware-configuration.nix
-    ];
-
-  # Use the GRUB 2 boot loader.
-  boot.loader.grub.enable = true;
-  boot.loader.grub.version = 2;
-  boot.loader.grub.device = node.diskBoot;
-
-  boot.kernelPackages = pkgs.linuxPackages_latest;
-  boot.kernelParams = [ "boot.shell_on_fail" ];
-  boot.kernel.sysctl."net.ipv4.conf.all.rp_filter" = "0";
-  boot.kernel.sysctl."net.ipv4.conf.default.rp_filter" = "0";
-
-  time.timeZone = "Europe/Warsaw";
-
-  networking.useDHCP = false;
-  networking.interfaces."${node.mgmtIf}".useDHCP = true;
-
-  # List packages installed in system profile. To search, run:
-  # $ nix search wget
-  environment.systemPackages = with pkgs; [
-    wget vim htop tcpdump
-    rxvt_unicode.terminfo
-  ];
-
-  # Some programs need SUID wrappers, can be configured further or are
-  # started in user sessions.
-  programs.mtr.enable = true;
-
-  # List services that you want to enable:
-  virtualisation.docker.enable = true;
-  virtualisation.docker.extraOptions = "--iptables=false --ip-masq=false --ip-forward=true";
-
-  # Docker 1.13 sets iptables FORWARD to DROP. Unfuck this.
-  systemd.services."docker-iptables-unfuck" = {
-    enable = true;
-    wantedBy = [ "kubernetes.target" ];
-    description = "Docker iptable Unfuck";
-    after = [ "docker.service" ];
-    requires = [ "docker.service" ];
-    path = [ pkgs.iptables ];
-    script = ''
-      iptables -P FORWARD ACCEPT
-    '';
-    serviceConfig.Type = "oneshot";
-  };
-  # Otherwise fetchGit nixpkgs pin fails.
-  systemd.services.nixos-upgrade.path = [ pkgs.git ];
-
-  # Enable the OpenSSH daemon.
-  services.openssh.enable = true;
-  users.users.root.openssh.authorizedKeys.keys = [
-    "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDD4VJXAXEHEXZk2dxNwehneuJcEGkfXG/U7z4fO79vDVIENdedtXQUyLyhZJc5RTEfHhQj66FwIqzl7mzBHd9x9PuDp6QAYXrkVNMj48s6JXqZqBvF6H/weRqFMf4a2TZv+hG8D0kpvmLheCwWAVRls7Jofnp/My+yDd57GMdsbG/yFEf6WPMiOnA7hxdSJSVihCsCSw2p8PD4GhBe8CVt7xIuinhutjm9zYBjV78NT8acjDUfJh0B1ODTjs7nuW1CC4jybSe2j/OU3Yczj4AxRxBNWuFxUq+jBo9BfpbKLh+Tt7re+zBkaicM77KM/oV6943JJxgHNBBOsv9scZE7 q3k@amnesia"
-    "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPm+KopMxs7QfATTKJBjCSKwttslx1u3dHl7tuppwN/4 q3k@paranoia"
-    "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDQb3YQoiYFZLKwvHYKbu1bMqzNeDCAszQhAe1+QI5SLDOotclyY/vFmOReZOsmyMFl71G2d7d+FbYNusUnNNjTxRYQ021tVc+RkMdLJaORRURmQfEFEKbai6QSFTwErXzuoIzyEPK0lbsQuGgqT9WaVnRzHJ2Q/4+qQbxAS34PuR5NqEkmn4G6LMo3OyJ5mwPkCj9lsqz4BcxRaMWFO3mNcwGDfSW+sqgc3E8N6LKrTpZq3ke7xacpQmcG5DU9VO+2QVPdltl9jWbs3gXjmF92YRNOuKPVfAOZBBsp8JOznfx8s9wDgs7RwPmDpjIAJEyoABqW5hlXfqRbTnfnMvuR informatic@InformaticPC"
-    "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDGkMgEVwQM8yeuFUYL2TwlJIq9yUNBmHnwce46zeL2PK2CkMz7sxT/om7sp/K5XDiqeD05Nioe+Dr3drP6B8uI33S5NgxPIfaqQsRS+CBEgk6cqFlcdlKETU/DT+/WsdoO173n7mgGeafPInEuQuGDUID0Fl099kIxtqfAhdeZFMM6/szAZEZsElLJ8K6dp1Ni/jmnXCZhjivZH3AZUlnqrmtDG7FY1bgcOfDXAal45LItughGPtrdiigXe9DK2fW3+9DBZZduh5DMJTNlphAZ+nfSrbyHVKUg6WsgMSprur4KdU47q1QwzqqvEj75JcdP1jOWoZi4F6VJDte9Wb9lhD1jGgjxY9O6Gs4CH35bx15W7CN9hgNa0C8NbPJe/fZYIeMZmJ1m7O2xmnYwP8j+t7RNJWu7Pa3Em4mOEXvhBF07Zfq+Ye/4SluoRgADy5eII2x5fFo5EBhInxK0/X8wF6XZvysalVifoCh7T4Edejoi91oAxFgYAxbboXGlod0eEHIi2hla8SM9+IBHOChmgawKBYp2kzAJyAmHNBF+Pah9G4arVCj/axp/SJZDZbJQoI7UT/fJzEtvlb5RWrHXRq+y6IvjpUq4pzpDWW04+9UMqEEXRmhWOakHfEVM9rN8h3aJBflLUBBnh0Z/hVsKNh8bCRHaKtah8TrD9i+wMw== patryk.jakuszew@gmail.com"
-    "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC33naG1ptCvUcRWX9cj9wXM1nW1lyQC4SvMJzWlr9aMD96O8hQ2JMkuIUgUJvorAY02QRplQ2BuoVoVkdkzwjMyi1bL3OdgcKo7Z1yByClGTTocqNJYY0lcUb6EJH8+6e6F9ydrQlSxNzL1uCaA7phZr+yPcmAmWbSfioXn98yXNkE0emHxzJv/nypJY56sDCMC2IXDRd8L2goDtPwgPEW7bWfAQdIFMJ75xOidZOTxJ8eqyXLw/kxY5UlyX66jdoYz1sE5XUHuoQl1AOG9UdlMo0aMhUvP4pX5l7r7EnA9OttKMFB3oWqkVK/R6ynZ52YNOU5BZ9V+Ppaj34W0xNu+p0mbHcCtXYCTrf/OU0hcZDbDaNTjs6Vtcm2wYw9iAKX7Tex+eOMwUwlrlcyPNRV5BTot7lGNYfauHCSIuWJKN4NhCLR/NtVNh4/94eKkPTwJsY6XqDcS7q49wPAs4DAH7BJgsbHPOqygVHrY0YYEfz3Pj0HTxJHQMCP/hQX4fXEGt0BjgoVJbXPAQtPyeg0JuxiUg+b4CgVVfQ6R060MlM1BZzhmh+FY5MJH6nJppS0aHYCvSg8Z68NUlCPKy0jpcyfuAIWQWwSGG1O010WShQG2ELsvNdg5/4HVdCGNl5mmoom6JOd72FOZyQlHDFfeQUQRn9HOeCq/c51rK99SQ== bartek@IHM"
-    "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICTR292kx/2CNuWYIsZ6gykQ036aBGrmheIuZa6S1D2x implr@thonk"
-  ];
-
-  networking.firewall.enable = false;
-
-  # Point k8s apiserver address at ourselves, as every node runs an apiserver with this cert name.
-  networking.extraHosts = ''
-    127.0.0.1 ${k8sapi}
-  '';
-
-  security.acme.certs = {
-    host = {
-      email = acmeEmail;
-      domain = fqdn;
-      webroot = services.nginx.virtualHosts.host.root;
-    };
-  };
-
-  services.nginx = {
-    enable = true;
-    virtualHosts.host = {
-      serverName = fqdn;
-      root = "/var/www/${fqdn}";
-    };
-  };
-
-  services.etcd = rec {
-    enable = true;
-    name = fqdn;
-    listenClientUrls = ["https://0.0.0.0:2379"];
-    advertiseClientUrls = ["https://${fqdn}:2379"];
-    listenPeerUrls = ["https://0.0.0.0:2380"];
-    initialAdvertisePeerUrls = ["https://${fqdn}:2380"];
-    initialCluster = (map (n: "${n.fqdn}=https://${n.fqdn}:2380") nodes);
-    initialClusterState = "existing";
-
-    clientCertAuth = true;
-    trustedCaFile = pki.etcd.server.ca;
-    certFile = pki.etcd.server.cert;
-    keyFile = pki.etcd.server.key;
-
-    peerClientCertAuth = true;
-    peerTrustedCaFile = pki.etcdPeer.ca;
-    peerCertFile = pki.etcdPeer.cert;
-    peerKeyFile = pki.etcdPeer.key;
-
-    extraConf = {
-      PEER_CLIENT_CERT_AUTH = "true";
-    };
-  };
-
-  services.kubernetes = {
-    # Pin to specific k8s package.
-    package = k8spkgs.kubernetes;
-    roles = []; # We do not use any nixpkgs predefined roles for k8s. Instead,
-                # we enable k8s components manually.
-
-    caFile = pki.kube.apiserver.ca;
-    clusterCidr = "10.10.16.0/20";
-
-    path = [ pkgs.e2fsprogs ]; # kubelet wants to mkfs.ext4 when mounting pvcs
-
-    addons.dns.enable = false;
-
-    apiserver = rec {
-      enable = true;
-      insecurePort = ports.k8sAPIServerPlain;
-      securePort = ports.k8sAPIServerSecure;
-      advertiseAddress = "${node.ipAddr}";
-
-      etcd = {
-        # https://github.com/kubernetes/kubernetes/issues/72102
-        servers = (map (n: "https://${n.fqdn}:2379") ( [ node ] ));
-        caFile = pki.etcd.kube.ca;
-        keyFile = pki.etcd.kube.key;
-        certFile = pki.etcd.kube.cert;
-      };
-
-      tlsCertFile = pki.kube.apiserver.cert;
-      tlsKeyFile = pki.kube.apiserver.key;
-
-      clientCaFile = pki.kube.apiserver.ca;
-
-      kubeletHttps = true;
-      kubeletClientCaFile = pki.kube.apiserver.ca;
-      kubeletClientCertFile = pki.kube.apiserver.cert;
-      kubeletClientKeyFile = pki.kube.apiserver.key;
-
-      serviceAccountKeyFile = pki.kube.serviceaccounts.key;
-
-      allowPrivileged = true;
-      serviceClusterIpRange = "10.10.12.0/24";
-      runtimeConfig = "api/all,authentication.k8s.io/v1beta1";
-      authorizationMode = ["Node" "RBAC"];
-      enableAdmissionPlugins = ["Initializers" "NamespaceLifecycle" "NodeRestriction" "LimitRanger" "ServiceAccount" "DefaultStorageClass" "ResourceQuota" "PodSecurityPolicy"];
-      extraOpts = ''
-        --apiserver-count=5 \
-        --proxy-client-cert-file=${pki.kubeFront.apiserver.cert} \
-        --proxy-client-key-file=${pki.kubeFront.apiserver.key} \
-        --requestheader-allowed-names= \
-        --requestheader-client-ca-file=${pki.kubeFront.apiserver.ca} \
-        --requestheader-extra-headers-prefix=X-Remote-Extra- \
-        --requestheader-group-headers=X-Remote-Group  \
-        --requestheader-username-headers=X-Remote-User \
-        -v=5
-      '';
-    };
-
-    controllerManager = {
-      enable = true;
-      bindAddress = "0.0.0.0";
-      insecurePort = ports.k8sControllerManagerPlain;
-      leaderElect = true;
-      serviceAccountKeyFile = pki.kube.serviceaccounts.key;
-      rootCaFile = pki.kube.ca;
-      extraOpts = ''
-        --service-cluster-ip-range=10.10.12.0/24 \
-        --use-service-account-credentials=true \
-        --secure-port=${toString ports.k8sControllerManagerSecure}\
-      '';
-      kubeconfig = pki.kube.controllermanager.config;
-    };
-
-    scheduler = {
-      enable = true;
-      address = "0.0.0.0";
-      port = 0;
-      leaderElect = true;
-      kubeconfig = pki.kube.scheduler.config;
-    };
-
-    proxy = {
-      enable = true;
-      kubeconfig = pki.kube.proxy.config;
-      extraOpts = ''
-        --hostname-override=${fqdn}\
-        --proxy-mode=iptables
-      '';
-    };
-
-    kubelet = {
-      enable = true;
-      unschedulable = false;
-      hostname = fqdn;
-      tlsCertFile = pki.kube.kubelet.cert;
-      tlsKeyFile = pki.kube.kubelet.key;
-      clientCaFile = pki.kube.kubelet.ca;
-      nodeIp = node.ipAddr;
-      networkPlugin = "cni";
-      clusterDns = "10.10.12.254";
-      kubeconfig = pki.kube.kubelet.config;
-      extraOpts = ''
-        --cni-conf-dir=/opt/cni/conf \
-        --cni-bin-dir=/opt/cni/bin \
-        --read-only-port=0
-      '';
-    };
-
-  };
-
-  # https://github.com/NixOS/nixpkgs/issues/60687
-  systemd.services.kube-control-plane-online = {
-    preStart = pkgs.lib.mkForce "";
-  };
-  # this seems to depend on flannel
-  # TODO(q3k): file issue
-  systemd.services.kubelet-online = {
-    script = pkgs.lib.mkForce "sleep 1";
-  };
-  # This by default removes all CNI plugins and replaces them with nix-defines ones
-  # Since we bring our own CNI plugins via containers with host mounts, this causes
-  # them to be removed on kubelet restart.
-  # TODO(https://github.com/NixOS/nixpkgs/issues/53601): fix when resolved
-  systemd.services.kubelet = {
-    preStart = pkgs.lib.mkForce ''
-      ${lib.concatMapStrings (img: ''
-        echo "Seeding docker image: ${img}"
-        docker load <${img}
-      '') [infraContainer]}
-    '';
-  };
-}
diff --git a/cluster/nix/configuration.sample.nix b/cluster/nix/configuration.sample.nix
deleted file mode 100644
index 8681f76..0000000
--- a/cluster/nix/configuration.sample.nix
+++ /dev/null
@@ -1,11 +0,0 @@
-{ config, pkgs, ... }:
-
-{
-  imports = [
-    ./hardware-configuration.nix
-    ./hswaw-cluster.nix
-  ];
-
-  networking.hostName = "bc01n01";
-  system.stateVersion = "18.09";
-}
diff --git a/cluster/nix/default.nix b/cluster/nix/default.nix
new file mode 100644
index 0000000..48690d4
--- /dev/null
+++ b/cluster/nix/default.nix
@@ -0,0 +1,14 @@
+let
+  pkgs = import (fetchGit {
+    name = "nixos-unstable-2020-02-12";
+    url = https://github.com/nixos/nixpkgs-channels/;
+    rev = "a21c2fa3ea2b88e698db6fc151d9c7259ae14d96";
+  });
+
+  cfg = {
+    overlays = [
+      (import ./provision.nix)
+    ];
+  };
+
+in pkgs cfg
diff --git a/cluster/nix/defs-cluster-k0.nix b/cluster/nix/defs-cluster-k0.nix
new file mode 100644
index 0000000..832c741
--- /dev/null
+++ b/cluster/nix/defs-cluster-k0.nix
@@ -0,0 +1,76 @@
+machineName:
+
+let
+  machines = (import ./defs-machines.nix);
+in rec {
+  domain = ".hswaw.net";
+  k8sapi = "k0.hswaw.net";
+  acmeEmail = "q3k@hackerspace.pl";
+
+  fqdn = machineName + domain;
+  machine = (builtins.head (builtins.filter (n: n.fqdn == fqdn) machines));
+  otherMachines = (builtins.filter (n: n.fqdn != fqdn) machines);
+  inherit machines;
+
+  pki = rec {
+    make = (radix: name: rec {
+      ca = ./../certs + "/ca-${radix}.crt";
+      cert = ./../certs + "/${radix}-${name}.cert";
+      key = ./../secrets/plain + "/${radix}-${name}.key";
+
+      json = (builtins.toJSON {
+        ca = (builtins.toString ca);
+        cert = (builtins.toString cert);
+        key = (builtins.toString key);
+      });
+    });
+
+    etcdPeer = (make "etcdpeer" fqdn);
+
+    etcd = {
+        server = (make "etcd" fqdn);
+        kube = (make "etcd" "kube");
+    };
+
+    makeKube = (name: (make "kube" name) // {
+      config = {
+        server = "https://${k8sapi}:${toString ports.k8sAPIServerSecure}";
+        certFile = (make "kube" name).cert;
+        keyFile = (make "kube" name).key;
+      };
+    });
+
+    kube = rec {
+      ca = apiserver.ca;
+      
+      # Used to identify apiserver.
+      apiserver = (makeKube "apiserver");
+
+      # Used to identify controller-manager.
+      controllermanager = (makeKube "controllermanager");
+
+      # Used to identify scheduler.
+      scheduler = (makeKube "scheduler");
+
+      # Used to identify kube-proxy.
+      proxy = (makeKube "proxy");
+
+      # Used to identify kubelet.
+      kubelet = (makeKube "kubelet-${fqdn}");
+
+      # Used to encrypt service accounts.
+      serviceaccounts = (makeKube "serviceaccounts");
+    };
+
+    kubeFront = {
+      apiserver = (make "kubefront" "apiserver");
+    };
+  };
+
+  ports = {
+    k8sAPIServerPlain = 4000;
+    k8sAPIServerSecure = 4001;
+    k8sControllerManagerPlain = 0; # 4002; do not serve plain http
+    k8sControllerManagerSecure = 4003;
+  };
+}
diff --git a/cluster/nix/defs-machines.nix b/cluster/nix/defs-machines.nix
new file mode 100644
index 0000000..f68cc65
--- /dev/null
+++ b/cluster/nix/defs-machines.nix
@@ -0,0 +1,57 @@
+[
+  rec {
+    name = "bc01n01";
+    threads = 16;
+    fqdn = "${name}.hswaw.net";
+    ipAddr = "185.236.240.35";
+    podNet = "10.10.16.0/24";
+    diskBoot = "/dev/disk/by-id/scsi-360024e8078a9060023b1043107388af5";
+    fsRoot = "/dev/disk/by-uuid/518ecac1-00ea-4ef0-9418-9eca6ce6d918";
+    mgmtIf = "eno1";
+    stateVersion = "18.09";
+  }
+  rec {
+    name = "bc01n02";
+    threads = 16;
+    fqdn = "${name}.hswaw.net";
+    ipAddr = "185.236.240.36";
+    podNet = "10.10.17.0/24";
+    diskBoot = "/dev/disk/by-id/scsi-360024e8078b0250023b10f8706d3c99e";
+    fsRoot = "/dev/disk/by-uuid/2d45c87b-029b-463e-a7cb-afd5a3089327";
+    mgmtIf = "eno1";
+    stateVersion = "18.09";
+  }
+  rec {
+    name = "bc01n03";
+    threads = 16;
+    fqdn = "${name}.hswaw.net";
+    ipAddr = "185.236.240.37";
+    podNet = "10.10.18.0/24";
+    diskBoot = "/dev/disk/by-id/scsi-360024e8078a8fa0023b1787e0605a3e0";
+    fsRoot = "/dev/disk/by-uuid/afc05836-c9b3-4d7e-b0a2-3ebfe2336d4f";
+    mgmtIf = "eno1";
+    stateVersion = "18.09";
+  }
+  rec {
+    name = "dcr01s22";
+    threads = 48;
+    fqdn = "${name}.hswaw.net";
+    ipAddr = "185.236.240.39";
+    podNet = "10.10.19.0/24";
+    diskBoot = "/dev/disk/by-id/ata-Samsung_SSD_860_EVO_250GB_S3YJNX1M604518E";
+    fsRoot = "/dev/disk/by-uuid/b4149083-49fe-4951-a143-aff4cedaf33a";
+    mgmtIf = "enp130s0f0";
+    stateVersion = "19.09";
+  }
+  rec {
+    name = "dcr01s24";
+    threads = 48;
+    fqdn = "${name}.hswaw.net";
+    ipAddr = "185.236.240.40";
+    podNet = "10.10.20.0/24";
+    diskBoot = "/dev/disk/by-id/ata-Samsung_SSD_860_EVO_250GB_S3YJNF0M717009H";
+    fsRoot = "/dev/disk/by-uuid/fc5c6456-5bbd-4b9e-a93e-7f9073ffe09a";
+    mgmtIf = "enp130s0f0";
+    stateVersion = "19.09";
+  }
+]
diff --git a/cluster/nix/module-base.nix b/cluster/nix/module-base.nix
new file mode 100644
index 0000000..266c145
--- /dev/null
+++ b/cluster/nix/module-base.nix
@@ -0,0 +1,56 @@
+{ config, pkgs, lib, ... }:
+
+with (( import ./defs-cluster-k0.nix ) config.networking.hostName);
+
+rec {
+  system.stateVersion = machine.stateVersion;
+  nix.maxJobs = machine.threads;
+
+  boot.loader.grub.enable = true;
+  boot.loader.grub.version = 2;
+  boot.loader.grub.device = machine.diskBoot;
+
+  fileSystems."/" =
+    { device = machine.fsRoot;
+      fsType = "ext4";
+    };
+  swapDevices = [ ];
+
+  boot.kernelPackages = pkgs.linuxPackages_latest;
+  boot.kernelParams = [ "boot.shell_on_fail" ];
+  boot.kernel.sysctl."net.ipv4.conf.all.rp_filter" = "0";
+  boot.kernel.sysctl."net.ipv4.conf.default.rp_filter" = "0";
+  boot.initrd.availableKernelModules = [ "uhci_hcd" "ehci_pci" "megaraid_sas" "usb_storage" "usbhid" "sd_mod" "sr_mod"  ];
+  boot.kernelModules = [ "kvm-intel" ];
+  boot.extraModulePackages = [];
+  hardware.enableRedistributableFirmware = true;
+
+  time.timeZone = "Europe/Warsaw";
+
+  environment.systemPackages = with pkgs; [
+    wget vim htop tcpdump
+    rxvt_unicode.terminfo
+  ];
+  programs.mtr.enable = true;
+
+  networking.useDHCP = false;
+  networking.interfaces."${machine.mgmtIf}".useDHCP = true;
+
+  # Instead of using nixpkgs from the root/nixos channel, use pkgs pin from this file.
+  nix.nixPath = [ "nixpkgs=${pkgs.path}" "nixos-config=/etc/nixos/configuration.nix" ];
+
+  # Otherwise fetchGit nixpkgs pin fails.
+  systemd.services.nixos-upgrade.path = [ pkgs.git ];
+
+  # Enable the OpenSSH daemon.
+  services.openssh.enable = true;
+  users.users.root.openssh.authorizedKeys.keys = [
+    "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDD4VJXAXEHEXZk2dxNwehneuJcEGkfXG/U7z4fO79vDVIENdedtXQUyLyhZJc5RTEfHhQj66FwIqzl7mzBHd9x9PuDp6QAYXrkVNMj48s6JXqZqBvF6H/weRqFMf4a2TZv+hG8D0kpvmLheCwWAVRls7Jofnp/My+yDd57GMdsbG/yFEf6WPMiOnA7hxdSJSVihCsCSw2p8PD4GhBe8CVt7xIuinhutjm9zYBjV78NT8acjDUfJh0B1ODTjs7nuW1CC4jybSe2j/OU3Yczj4AxRxBNWuFxUq+jBo9BfpbKLh+Tt7re+zBkaicM77KM/oV6943JJxgHNBBOsv9scZE7 q3k@amnesia"
+    "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPm+KopMxs7QfATTKJBjCSKwttslx1u3dHl7tuppwN/4 q3k@paranoia"
+    "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDQb3YQoiYFZLKwvHYKbu1bMqzNeDCAszQhAe1+QI5SLDOotclyY/vFmOReZOsmyMFl71G2d7d+FbYNusUnNNjTxRYQ021tVc+RkMdLJaORRURmQfEFEKbai6QSFTwErXzuoIzyEPK0lbsQuGgqT9WaVnRzHJ2Q/4+qQbxAS34PuR5NqEkmn4G6LMo3OyJ5mwPkCj9lsqz4BcxRaMWFO3mNcwGDfSW+sqgc3E8N6LKrTpZq3ke7xacpQmcG5DU9VO+2QVPdltl9jWbs3gXjmF92YRNOuKPVfAOZBBsp8JOznfx8s9wDgs7RwPmDpjIAJEyoABqW5hlXfqRbTnfnMvuR informatic@InformaticPC"
+    "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDGkMgEVwQM8yeuFUYL2TwlJIq9yUNBmHnwce46zeL2PK2CkMz7sxT/om7sp/K5XDiqeD05Nioe+Dr3drP6B8uI33S5NgxPIfaqQsRS+CBEgk6cqFlcdlKETU/DT+/WsdoO173n7mgGeafPInEuQuGDUID0Fl099kIxtqfAhdeZFMM6/szAZEZsElLJ8K6dp1Ni/jmnXCZhjivZH3AZUlnqrmtDG7FY1bgcOfDXAal45LItughGPtrdiigXe9DK2fW3+9DBZZduh5DMJTNlphAZ+nfSrbyHVKUg6WsgMSprur4KdU47q1QwzqqvEj75JcdP1jOWoZi4F6VJDte9Wb9lhD1jGgjxY9O6Gs4CH35bx15W7CN9hgNa0C8NbPJe/fZYIeMZmJ1m7O2xmnYwP8j+t7RNJWu7Pa3Em4mOEXvhBF07Zfq+Ye/4SluoRgADy5eII2x5fFo5EBhInxK0/X8wF6XZvysalVifoCh7T4Edejoi91oAxFgYAxbboXGlod0eEHIi2hla8SM9+IBHOChmgawKBYp2kzAJyAmHNBF+Pah9G4arVCj/axp/SJZDZbJQoI7UT/fJzEtvlb5RWrHXRq+y6IvjpUq4pzpDWW04+9UMqEEXRmhWOakHfEVM9rN8h3aJBflLUBBnh0Z/hVsKNh8bCRHaKtah8TrD9i+wMw== patryk.jakuszew@gmail.com"
+    "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC33naG1ptCvUcRWX9cj9wXM1nW1lyQC4SvMJzWlr9aMD96O8hQ2JMkuIUgUJvorAY02QRplQ2BuoVoVkdkzwjMyi1bL3OdgcKo7Z1yByClGTTocqNJYY0lcUb6EJH8+6e6F9ydrQlSxNzL1uCaA7phZr+yPcmAmWbSfioXn98yXNkE0emHxzJv/nypJY56sDCMC2IXDRd8L2goDtPwgPEW7bWfAQdIFMJ75xOidZOTxJ8eqyXLw/kxY5UlyX66jdoYz1sE5XUHuoQl1AOG9UdlMo0aMhUvP4pX5l7r7EnA9OttKMFB3oWqkVK/R6ynZ52YNOU5BZ9V+Ppaj34W0xNu+p0mbHcCtXYCTrf/OU0hcZDbDaNTjs6Vtcm2wYw9iAKX7Tex+eOMwUwlrlcyPNRV5BTot7lGNYfauHCSIuWJKN4NhCLR/NtVNh4/94eKkPTwJsY6XqDcS7q49wPAs4DAH7BJgsbHPOqygVHrY0YYEfz3Pj0HTxJHQMCP/hQX4fXEGt0BjgoVJbXPAQtPyeg0JuxiUg+b4CgVVfQ6R060MlM1BZzhmh+FY5MJH6nJppS0aHYCvSg8Z68NUlCPKy0jpcyfuAIWQWwSGG1O010WShQG2ELsvNdg5/4HVdCGNl5mmoom6JOd72FOZyQlHDFfeQUQRn9HOeCq/c51rK99SQ== bartek@IHM"
+    "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICTR292kx/2CNuWYIsZ6gykQ036aBGrmheIuZa6S1D2x implr@thonk"
+  ];
+
+}
diff --git a/cluster/nix/module-cluster.nix b/cluster/nix/module-cluster.nix
new file mode 100644
index 0000000..cc50503
--- /dev/null
+++ b/cluster/nix/module-cluster.nix
@@ -0,0 +1,214 @@
+{ config, pkgs, lib, ... }:
+
+with (( import ./defs-cluster-k0.nix ) config.networking.hostName);
+let
+  # Pin for k8s packages. This is so that upagrading the system will not upgrade the k8s control or data planes.
+  k8spkgs = import (fetchGit {
+    # Now at 1.14.3
+    name = "nixos-unstable-2019-06-17";
+    url = https://github.com/nixos/nixpkgs-channels/;
+    rev = "415e8e5820b7825fb74a6c7986bf6af725227eaa";
+  }) {};
+  # Pin for kubelet
+  k8spkgsKubelet = import (fetchGit {
+    # Now at 1.13.5
+    name = "nixos-unstable-2019-04-12";
+    url = https://github.com/nixos/nixpkgs-channels/;
+    rev = "1fc591f9a5bd1b016b5d66dfab29560073955a14";
+  }) {};
+
+in rec {
+  # Disable kubelet service and bring in our own override.
+  # Also nuke flannel from the orbit.
+  disabledModules = [
+    "services/cluster/kubernetes/kubelet.nix"
+    "services/cluster/kubernetes/flannel.nix"
+  ];
+
+  imports = [
+    ./module-kubelet.nix
+  ];
+
+  virtualisation.docker.enable = true;
+  virtualisation.docker.extraOptions = "--iptables=false --ip-masq=false --ip-forward=true";
+
+  # Docker 1.13 sets iptables FORWARD to DROP. Unfuck this.
+  systemd.services."docker-iptables-unfuck" = {
+    enable = true;
+    wantedBy = [ "kubernetes.target" ];
+    description = "Docker iptable Unfuck";
+    after = [ "docker.service" ];
+    requires = [ "docker.service" ];
+    path = [ pkgs.iptables ];
+    script = ''
+      iptables -P FORWARD ACCEPT
+    '';
+    serviceConfig.Type = "oneshot";
+  };
+
+  networking.firewall.enable = false;
+
+  # Point k8s apiserver address at ourselves, as every machine runs an apiserver with this cert name.
+  networking.extraHosts = ''
+    127.0.0.1 ${k8sapi}
+  '';
+
+  security.acme.certs = {
+    host = {
+      email = acmeEmail;
+      domain = fqdn;
+      webroot = services.nginx.virtualHosts.host.root;
+    };
+  };
+
+  services.nginx = {
+    enable = true;
+    virtualHosts.host = {
+      serverName = fqdn;
+      root = "/var/www/${fqdn}";
+    };
+  };
+
+  services.etcd = rec {
+    enable = true;
+    name = fqdn;
+    listenClientUrls = ["https://0.0.0.0:2379"];
+    advertiseClientUrls = ["https://${fqdn}:2379"];
+    listenPeerUrls = ["https://0.0.0.0:2380"];
+    initialAdvertisePeerUrls = ["https://${fqdn}:2380"];
+    initialCluster = (map (n: "${n.fqdn}=https://${n.fqdn}:2380") machines);
+    initialClusterState = "existing";
+
+    clientCertAuth = true;
+    trustedCaFile = pki.etcd.server.ca;
+    certFile = pki.etcd.server.cert;
+    keyFile = pki.etcd.server.key;
+
+    peerClientCertAuth = true;
+    peerTrustedCaFile = pki.etcdPeer.ca;
+    peerCertFile = pki.etcdPeer.cert;
+    peerKeyFile = pki.etcdPeer.key;
+
+    extraConf = {
+      PEER_CLIENT_CERT_AUTH = "true";
+    };
+  };
+
+  services.kubernetes = {
+    # Pin to specific k8s package.
+    package = k8spkgs.kubernetes;
+    roles = []; # We do not use any nixpkgs predefined roles for k8s. Instead,
+                # we enable k8s components manually.
+
+    caFile = pki.kube.apiserver.ca;
+    clusterCidr = "10.10.16.0/20";
+
+    path = [ pkgs.e2fsprogs ]; # kubelet wants to mkfs.ext4 when mounting pvcs
+
+    addons.dns.enable = false;
+
+    apiserver = rec {
+      enable = true;
+      insecurePort = ports.k8sAPIServerPlain;
+      securePort = ports.k8sAPIServerSecure;
+      advertiseAddress = "${machine.ipAddr}";
+
+      etcd = {
+        # https://github.com/kubernetes/kubernetes/issues/72102
+        servers = (map (n: "https://${n.fqdn}:2379") ( [ machine ] ));
+        caFile = pki.etcd.kube.ca;
+        keyFile = pki.etcd.kube.key;
+        certFile = pki.etcd.kube.cert;
+      };
+
+      tlsCertFile = pki.kube.apiserver.cert;
+      tlsKeyFile = pki.kube.apiserver.key;
+
+      clientCaFile = pki.kube.apiserver.ca;
+
+      kubeletHttps = true;
+      kubeletClientCaFile = pki.kube.apiserver.ca;
+      kubeletClientCertFile = pki.kube.apiserver.cert;
+      kubeletClientKeyFile = pki.kube.apiserver.key;
+
+      serviceAccountKeyFile = pki.kube.serviceaccounts.key;
+
+      allowPrivileged = true;
+      serviceClusterIpRange = "10.10.12.0/24";
+      runtimeConfig = "api/all,authentication.k8s.io/v1beta1";
+      authorizationMode = ["Node" "RBAC"];
+      enableAdmissionPlugins = ["NamespaceLifecycle" "NodeRestriction" "LimitRanger" "ServiceAccount" "DefaultStorageClass" "ResourceQuota" "PodSecurityPolicy"];
+      extraOpts = ''
+        --apiserver-count=5 \
+        --proxy-client-cert-file=${pki.kubeFront.apiserver.cert} \
+        --proxy-client-key-file=${pki.kubeFront.apiserver.key} \
+        --requestheader-allowed-names= \
+        --requestheader-client-ca-file=${pki.kubeFront.apiserver.ca} \
+        --requestheader-extra-headers-prefix=X-Remote-Extra- \
+        --requestheader-group-headers=X-Remote-Group  \
+        --requestheader-username-headers=X-Remote-User \
+        -v=5
+      '';
+    };
+
+    controllerManager = {
+      enable = true;
+      bindAddress = "0.0.0.0";
+      insecurePort = ports.k8sControllerManagerPlain;
+      leaderElect = true;
+      serviceAccountKeyFile = pki.kube.serviceaccounts.key;
+      rootCaFile = pki.kube.ca;
+      extraOpts = ''
+        --service-cluster-ip-range=10.10.12.0/24 \
+        --use-service-account-credentials=true \
+        --secure-port=${toString ports.k8sControllerManagerSecure}\
+      '';
+      kubeconfig = pki.kube.controllermanager.config;
+    };
+
+    scheduler = {
+      enable = true;
+      address = "0.0.0.0";
+      port = 0;
+      leaderElect = true;
+      kubeconfig = pki.kube.scheduler.config;
+    };
+
+    proxy = {
+      enable = true;
+      kubeconfig = pki.kube.proxy.config;
+      extraOpts = ''
+        --hostname-override=${fqdn}\
+        --proxy-mode=iptables
+      '';
+    };
+
+    kubelet = {
+      enable = true;
+      unschedulable = false;
+      hostname = fqdn;
+      tlsCertFile = pki.kube.kubelet.cert;
+      tlsKeyFile = pki.kube.kubelet.key;
+      clientCaFile = pki.kube.kubelet.ca;
+      nodeIp = machine.ipAddr;
+      networkPlugin = "cni";
+      clusterDns = "10.10.12.254";
+      kubeconfig = pki.kube.kubelet.config;
+      extraOpts = ''
+        --read-only-port=0
+      '';
+      package = k8spkgsKubelet.kubernetes;
+    };
+
+  };
+
+  # https://github.com/NixOS/nixpkgs/issues/60687
+  systemd.services.kube-control-plane-online = {
+    preStart = pkgs.lib.mkForce "";
+  };
+  # this seems to depend on flannel
+  # TODO(q3k): file issue
+  systemd.services.kubelet-online = {
+    script = pkgs.lib.mkForce "sleep 1";
+  };
+}
diff --git a/cluster/nix/module-kubelet.nix b/cluster/nix/module-kubelet.nix
new file mode 100644
index 0000000..f475b5b
--- /dev/null
+++ b/cluster/nix/module-kubelet.nix
@@ -0,0 +1,308 @@
+# Same as upstream kubelet.nix module from nixpkgs, but with the following
+# changes:
+#   - cni tunables nuked and replaced with static host dirs, so that calico
+#     running on k8s can drop CNI plugins there itself
+#   - package configurable separately from rest of kubernetes
+
+{ config, lib, pkgs, ... }:
+
+with lib;
+
+let
+  top = config.services.kubernetes;
+  cfg = top.kubelet;
+
+  infraContainer = pkgs.dockerTools.buildImage {
+    name = "pause";
+    tag = "latest";
+    contents = top.package.pause;
+    config.Cmd = "/bin/pause";
+  };
+
+  kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig;
+
+  manifestPath = "kubernetes/manifests";
+
+  taintOptions = with lib.types; { name, ... }: {
+    options = {
+      key = mkOption {
+        description = "Key of taint.";
+        default = name;
+        type = str;
+      };
+      value = mkOption {
+        description = "Value of taint.";
+        type = str;
+      };
+      effect = mkOption {
+        description = "Effect of taint.";
+        example = "NoSchedule";
+        type = enum ["NoSchedule" "PreferNoSchedule" "NoExecute"];
+      };
+    };
+  };
+
+  taints = concatMapStringsSep "," (v: "${v.key}=${v.value}:${v.effect}") (mapAttrsToList (n: v: v) cfg.taints);
+in
+{
+  imports = [
+    #(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "applyManifests" ] "")
+    #(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "cadvisorPort" ] "")
+    #(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "allowPrivileged" ] "")
+  ];
+
+  # services/cluster/kubernetes/default.nix still wants to poke flannel,
+  # but since we nuke that module we have to add a fake tunable for it.
+  options.services.kubernetes.flannel = {
+    enable = mkEnableOption "enable flannel networking";
+  };
+
+  ###### interface
+  options.services.kubernetes.kubelet = with lib.types; {
+
+    address = mkOption {
+      description = "Kubernetes kubelet info server listening address.";
+      default = "0.0.0.0";
+      type = str;
+    };
+
+    clusterDns = mkOption {
+      description = "Use alternative DNS.";
+      default = "10.1.0.1";
+      type = str;
+    };
+
+    clusterDomain = mkOption {
+      description = "Use alternative domain.";
+      default = config.services.kubernetes.addons.dns.clusterDomain;
+      type = str;
+    };
+
+    clientCaFile = mkOption {
+      description = "Kubernetes apiserver CA file for client authentication.";
+      default = top.caFile;
+      type = nullOr path;
+    };
+
+    enable = mkEnableOption "Kubernetes kubelet.";
+
+    extraOpts = mkOption {
+      description = "Kubernetes kubelet extra command line options.";
+      default = "";
+      type = str;
+    };
+
+    featureGates = mkOption {
+      description = "List set of feature gates";
+      default = top.featureGates;
+      type = listOf str;
+    };
+
+    healthz = {
+      bind = mkOption {
+        description = "Kubernetes kubelet healthz listening address.";
+        default = "127.0.0.1";
+        type = str;
+      };
+
+      port = mkOption {
+        description = "Kubernetes kubelet healthz port.";
+        default = 10248;
+        type = int;
+      };
+    };
+
+    hostname = mkOption {
+      description = "Kubernetes kubelet hostname override.";
+      default = config.networking.hostName;
+      type = str;
+    };
+
+    kubeconfig = top.lib.mkKubeConfigOptions "Kubelet";
+
+    manifests = mkOption {
+      description = "List of manifests to bootstrap with kubelet (only pods can be created as manifest entry)";
+      type = attrsOf attrs;
+      default = {};
+    };
+
+    networkPlugin = mkOption {
+      description = "Network plugin to use by Kubernetes.";
+      type = nullOr (enum ["cni" "kubenet"]);
+      default = "kubenet";
+    };
+
+    nodeIp = mkOption {
+      description = "IP address of the node. If set, kubelet will use this IP address for the node.";
+      default = null;
+      type = nullOr str;
+    };
+
+    registerNode = mkOption {
+      description = "Whether to auto register kubelet with API server.";
+      default = true;
+      type = bool;
+    };
+
+    package = mkOption {
+      description = "Kubernetes package to use.";
+      type = types.package;
+      default = pkgs.kubernetes;
+      defaultText = "pkgs.kubernetes";
+    };
+
+    port = mkOption {
+      description = "Kubernetes kubelet info server listening port.";
+      default = 10250;
+      type = int;
+    };
+
+    seedDockerImages = mkOption {
+      description = "List of docker images to preload on system";
+      default = [];
+      type = listOf package;
+    };
+
+    taints = mkOption {
+      description = "Node taints (https://kubernetes.io/docs/concepts/configuration/assign-pod-node/).";
+      default = {};
+      type = attrsOf (submodule [ taintOptions ]);
+    };
+
+    tlsCertFile = mkOption {
+      description = "File containing x509 Certificate for HTTPS.";
+      default = null;
+      type = nullOr path;
+    };
+
+    tlsKeyFile = mkOption {
+      description = "File containing x509 private key matching tlsCertFile.";
+      default = null;
+      type = nullOr path;
+    };
+
+    unschedulable = mkOption {
+      description = "Whether to set node taint to unschedulable=true as it is the case of node that has only master role.";
+      default = false;
+      type = bool;
+    };
+
+    verbosity = mkOption {
+      description = ''
+        Optional glog verbosity level for logging statements. See
+        <link xlink:href="https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md"/>
+      '';
+      default = null;
+      type = nullOr int;
+    };
+
+  };
+
+  ###### implementation
+  config = mkMerge [
+    (mkIf cfg.enable {
+      services.kubernetes.kubelet.seedDockerImages = [infraContainer];
+
+      systemd.services.kubelet = {
+        description = "Kubernetes Kubelet Service";
+        wantedBy = [ "kubernetes.target" ];
+        after = [ "network.target" "docker.service" "kube-apiserver.service" ];
+        path = with pkgs; [ gitMinimal openssh docker utillinux iproute ethtool thin-provisioning-tools iptables socat ] ++ top.path;
+        preStart = ''
+          ${concatMapStrings (img: ''
+            echo "Seeding docker image: ${img}"
+            docker load <${img}
+          '') cfg.seedDockerImages}
+        '';
+        serviceConfig = {
+          Slice = "kubernetes.slice";
+          CPUAccounting = true;
+          MemoryAccounting = true;
+          Restart = "on-failure";
+          RestartSec = "1000ms";
+          ExecStart = ''${cfg.package}/bin/kubelet \
+            --address=${cfg.address} \
+            --authentication-token-webhook \
+            --authentication-token-webhook-cache-ttl="10s" \
+            --authorization-mode=Webhook \
+            ${optionalString (cfg.clientCaFile != null)
+              "--client-ca-file=${cfg.clientCaFile}"} \
+            ${optionalString (cfg.clusterDns != "")
+              "--cluster-dns=${cfg.clusterDns}"} \
+            ${optionalString (cfg.clusterDomain != "")
+              "--cluster-domain=${cfg.clusterDomain}"} \
+            --cni-conf-dir=/opt/cni/conf \
+            --cni-bin-dir=/opt/cni/bin \
+            ${optionalString (cfg.featureGates != [])
+              "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \
+            --hairpin-mode=hairpin-veth \
+            --healthz-bind-address=${cfg.healthz.bind} \
+            --healthz-port=${toString cfg.healthz.port} \
+            --hostname-override=${cfg.hostname} \
+            --kubeconfig=${kubeconfig} \
+            ${optionalString (cfg.networkPlugin != null)
+              "--network-plugin=${cfg.networkPlugin}"} \
+            ${optionalString (cfg.nodeIp != null)
+              "--node-ip=${cfg.nodeIp}"} \
+            --pod-infra-container-image=pause \
+            ${optionalString (cfg.manifests != {})
+              "--pod-manifest-path=/etc/${manifestPath}"} \
+            --port=${toString cfg.port} \
+            --register-node=${boolToString cfg.registerNode} \
+            ${optionalString (taints != "")
+              "--register-with-taints=${taints}"} \
+            --root-dir=${top.dataDir} \
+            ${optionalString (cfg.tlsCertFile != null)
+              "--tls-cert-file=${cfg.tlsCertFile}"} \
+            ${optionalString (cfg.tlsKeyFile != null)
+              "--tls-private-key-file=${cfg.tlsKeyFile}"} \
+            ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \
+            ${cfg.extraOpts}
+          '';
+          WorkingDirectory = top.dataDir;
+        };
+      };
+
+      boot.kernelModules = ["br_netfilter"];
+
+      services.kubernetes.kubelet.hostname = with config.networking;
+        mkDefault (hostName + optionalString (domain != null) ".${domain}");
+
+      services.kubernetes.pki.certs = with top.lib; {
+        kubelet = mkCert {
+          name = "kubelet";
+          CN = top.kubelet.hostname;
+          action = "systemctl restart kubelet.service";
+
+        };
+        kubeletClient = mkCert {
+          name = "kubelet-client";
+          CN = "system:node:${top.kubelet.hostname}";
+          fields = {
+            O = "system:nodes";
+          };
+          action = "systemctl restart kubelet.service";
+        };
+      };
+
+      services.kubernetes.kubelet.kubeconfig.server = mkDefault top.apiserverAddress;
+    })
+
+    (mkIf (cfg.enable && cfg.manifests != {}) {
+      environment.etc = mapAttrs' (name: manifest:
+        nameValuePair "${manifestPath}/${name}.json" {
+          text = builtins.toJSON manifest;
+          mode = "0755";
+        }
+      ) cfg.manifests;
+    })
+
+    (mkIf (cfg.unschedulable && cfg.enable) {
+      services.kubernetes.kubelet.taints.unschedulable = {
+        value = "true";
+        effect = "NoSchedule";
+      };
+    })
+
+  ];
+}
diff --git a/cluster/nix/module-kubernetes.nix b/cluster/nix/module-kubernetes.nix
new file mode 100644
index 0000000..1e71ada
--- /dev/null
+++ b/cluster/nix/module-kubernetes.nix
@@ -0,0 +1,216 @@
+{ config, pkgs, lib, ... }:
+
+with (( import ./defs-cluster-k0.nix ) config.networking.hostName);
+let
+  # Pin for k8s packages. This is so that upagrading the system will not upgrade the k8s control or data planes.
+  k8spkgs = import (fetchGit {
+    # Now at 1.14.3
+    name = "nixos-unstable-2019-06-17";
+    url = https://github.com/nixos/nixpkgs-channels/;
+    rev = "415e8e5820b7825fb74a6c7986bf6af725227eaa";
+  }) {};
+  # Pin for kubelet
+  k8spkgsKubelet = import (fetchGit {
+    # Now at 1.13.5
+    name = "nixos-unstable-2019-04-12";
+    url = https://github.com/nixos/nixpkgs-channels/;
+    rev = "1fc591f9a5bd1b016b5d66dfab29560073955a14";
+  }) {};
+
+in rec {
+  # Disable kubelet service and bring in our own override.
+  # Also nuke flannel from the orbit.
+  disabledModules = [
+    "services/cluster/kubernetes/kubelet.nix"
+    "services/cluster/kubernetes/flannel.nix"
+  ];
+
+  imports =
+    [
+      ./module-kubelet.nix
+    ];
+
+  # List services that you want to enable:
+  virtualisation.docker.enable = true;
+  virtualisation.docker.extraOptions = "--iptables=false --ip-masq=false --ip-forward=true";
+
+  # Docker 1.13 sets iptables FORWARD to DROP. Unfuck this.
+  systemd.services."docker-iptables-unfuck" = {
+    enable = true;
+    wantedBy = [ "kubernetes.target" ];
+    description = "Docker iptable Unfuck";
+    after = [ "docker.service" ];
+    requires = [ "docker.service" ];
+    path = [ pkgs.iptables ];
+    script = ''
+      iptables -P FORWARD ACCEPT
+    '';
+    serviceConfig.Type = "oneshot";
+  };
+
+  networking.firewall.enable = false;
+
+  # Point k8s apiserver address at ourselves, as every machine runs an apiserver with this cert name.
+  networking.extraHosts = ''
+    127.0.0.1 ${k8sapi}
+  '';
+
+  security.acme.certs = {
+    host = {
+      email = acmeEmail;
+      domain = fqdn;
+      webroot = services.nginx.virtualHosts.host.root;
+    };
+  };
+
+  services.nginx = {
+    enable = true;
+    virtualHosts.host = {
+      serverName = fqdn;
+      root = "/var/www/${fqdn}";
+    };
+  };
+
+  services.etcd = rec {
+    enable = true;
+    name = fqdn;
+    listenClientUrls = ["https://0.0.0.0:2379"];
+    advertiseClientUrls = ["https://${fqdn}:2379"];
+    listenPeerUrls = ["https://0.0.0.0:2380"];
+    initialAdvertisePeerUrls = ["https://${fqdn}:2380"];
+    initialCluster = (map (n: "${n.fqdn}=https://${n.fqdn}:2380") machines);
+    initialClusterState = "existing";
+
+    clientCertAuth = true;
+    trustedCaFile = pki.etcd.server.ca;
+    certFile = pki.etcd.server.cert;
+    keyFile = pki.etcd.server.key;
+
+    peerClientCertAuth = true;
+    peerTrustedCaFile = pki.etcdPeer.ca;
+    peerCertFile = pki.etcdPeer.cert;
+    peerKeyFile = pki.etcdPeer.key;
+
+    extraConf = {
+      PEER_CLIENT_CERT_AUTH = "true";
+    };
+  };
+
+  services.kubernetes = {
+    # Pin to specific k8s package.
+    package = k8spkgs.kubernetes;
+    roles = []; # We do not use any nixpkgs predefined roles for k8s. Instead,
+                # we enable k8s components manually.
+
+    caFile = pki.kube.apiserver.ca;
+    clusterCidr = "10.10.16.0/20";
+
+    path = [ pkgs.e2fsprogs ]; # kubelet wants to mkfs.ext4 when mounting pvcs
+
+    addons.dns.enable = false;
+
+    apiserver = rec {
+      enable = true;
+      insecurePort = ports.k8sAPIServerPlain;
+      securePort = ports.k8sAPIServerSecure;
+      advertiseAddress = "${machine.ipAddr}";
+
+      etcd = {
+        # https://github.com/kubernetes/kubernetes/issues/72102
+        servers = (map (n: "https://${n.fqdn}:2379") ( [ machine ] ));
+        caFile = pki.etcd.kube.ca;
+        keyFile = pki.etcd.kube.key;
+        certFile = pki.etcd.kube.cert;
+      };
+
+      tlsCertFile = pki.kube.apiserver.cert;
+      tlsKeyFile = pki.kube.apiserver.key;
+
+      clientCaFile = pki.kube.apiserver.ca;
+
+      kubeletHttps = true;
+      kubeletClientCaFile = pki.kube.apiserver.ca;
+      kubeletClientCertFile = pki.kube.apiserver.cert;
+      kubeletClientKeyFile = pki.kube.apiserver.key;
+
+      serviceAccountKeyFile = pki.kube.serviceaccounts.key;
+
+      allowPrivileged = true;
+      serviceClusterIpRange = "10.10.12.0/24";
+      runtimeConfig = "api/all,authentication.k8s.io/v1beta1";
+      authorizationMode = ["Node" "RBAC"];
+      enableAdmissionPlugins = ["NamespaceLifecycle" "NodeRestriction" "LimitRanger" "ServiceAccount" "DefaultStorageClass" "ResourceQuota" "PodSecurityPolicy"];
+      extraOpts = ''
+        --apiserver-count=5 \
+        --proxy-client-cert-file=${pki.kubeFront.apiserver.cert} \
+        --proxy-client-key-file=${pki.kubeFront.apiserver.key} \
+        --requestheader-allowed-names= \
+        --requestheader-client-ca-file=${pki.kubeFront.apiserver.ca} \
+        --requestheader-extra-headers-prefix=X-Remote-Extra- \
+        --requestheader-group-headers=X-Remote-Group  \
+        --requestheader-username-headers=X-Remote-User \
+        -v=5
+      '';
+    };
+
+    controllerManager = {
+      enable = true;
+      bindAddress = "0.0.0.0";
+      insecurePort = ports.k8sControllerManagerPlain;
+      leaderElect = true;
+      serviceAccountKeyFile = pki.kube.serviceaccounts.key;
+      rootCaFile = pki.kube.ca;
+      extraOpts = ''
+        --service-cluster-ip-range=10.10.12.0/24 \
+        --use-service-account-credentials=true \
+        --secure-port=${toString ports.k8sControllerManagerSecure}\
+      '';
+      kubeconfig = pki.kube.controllermanager.config;
+    };
+
+    scheduler = {
+      enable = true;
+      address = "0.0.0.0";
+      port = 0;
+      leaderElect = true;
+      kubeconfig = pki.kube.scheduler.config;
+    };
+
+    proxy = {
+      enable = true;
+      kubeconfig = pki.kube.proxy.config;
+      extraOpts = ''
+        --hostname-override=${fqdn}\
+        --proxy-mode=iptables
+      '';
+    };
+
+    kubelet = {
+      enable = true;
+      unschedulable = false;
+      hostname = fqdn;
+      tlsCertFile = pki.kube.kubelet.cert;
+      tlsKeyFile = pki.kube.kubelet.key;
+      clientCaFile = pki.kube.kubelet.ca;
+      nodeIp = machine.ipAddr;
+      networkPlugin = "cni";
+      clusterDns = "10.10.12.254";
+      kubeconfig = pki.kube.kubelet.config;
+      extraOpts = ''
+        --read-only-port=0
+      '';
+      package = k8spkgsKubelet.kubernetes;
+    };
+
+  };
+
+  # https://github.com/NixOS/nixpkgs/issues/60687
+  systemd.services.kube-control-plane-online = {
+    preStart = pkgs.lib.mkForce "";
+  };
+  # this seems to depend on flannel
+  # TODO(q3k): file issue
+  systemd.services.kubelet-online = {
+    script = pkgs.lib.mkForce "sleep 1";
+  };
+}
diff --git a/cluster/nix/provision.nix b/cluster/nix/provision.nix
new file mode 100644
index 0000000..dbe697b
--- /dev/null
+++ b/cluster/nix/provision.nix
@@ -0,0 +1,40 @@
+self: super:
+
+let 
+  machines = (import ./defs-machines.nix);
+  configurations = builtins.listToAttrs (map (machine: {
+    name = machine.fqdn;
+    value = super.nixos ({ config, pkgs, ... }: {
+      networking.hostName = machine.name;
+      imports = [
+        ./module-base.nix
+        ./module-kubernetes.nix
+      ];
+    });
+  }) machines);
+
+  scriptForMachine = machine: let
+    configuration = configurations."${machine.fqdn}";
+  in ''
+   set -e
+   remote=root@${machine.fqdn}
+   echo "Configuration for ${machine.fqdn} is ${configuration.toplevel}"
+   nix copy --no-check-sigs -s --to ssh://$remote ${configuration.toplevel}
+   ssh $remote ${configuration.toplevel}/bin/switch-to-configuration $1
+  '';
+
+  machineProvisioners = builtins.listToAttrs (map (machine: {
+      name = "provision-${machine.name}";
+      value = super.writeScriptBin "provision-${machine.name}" (scriptForMachine machine);
+    }) machines);
+in
+{
+  provision = ({
+    provision = super.writeScriptBin "provision"
+      (
+        ''
+          echo "Available provisioniers:"
+        '' + (builtins.concatStringsSep "\n" (map (machine: "echo '  provision-${machine.name}'") machines))
+      );
+  }) // machineProvisioners;
+}
diff --git a/cluster/nix/toplevel.nix b/cluster/nix/toplevel.nix
deleted file mode 100644
index a0f0aed..0000000
--- a/cluster/nix/toplevel.nix
+++ /dev/null
@@ -1,107 +0,0 @@
-rec {
-  domain = ".hswaw.net";
-  k8sapi = "k0.hswaw.net";
-  acmeEmail = "q3k@hackerspace.pl";
-
-  nodes = [
-    {
-      fqdn = "bc01n01.hswaw.net";
-      ipAddr = "185.236.240.35";
-      podNet = "10.10.16.0/24";
-      diskBoot = "/dev/sdb";
-      mgmtIf = "eno1";
-    }
-    {
-      fqdn = "bc01n02.hswaw.net";
-      ipAddr = "185.236.240.36";
-      podNet = "10.10.17.0/24";
-      diskBoot = "/dev/sdb";
-      mgmtIf = "eno1";
-    }
-    {
-      fqdn = "bc01n03.hswaw.net";
-      ipAddr = "185.236.240.37";
-      podNet = "10.10.18.0/24";
-      diskBoot = "/dev/sdb";
-      mgmtIf = "eno1";
-    }
-    {
-      fqdn = "dcr01s22.hswaw.net";
-      ipAddr = "185.236.240.39";
-      podNet = "10.10.19.0/24";
-      diskBoot = "/dev/sda";
-      mgmtIf = "enp130s0f0";
-    }
-    {
-      fqdn = "dcr01s24.hswaw.net";
-      ipAddr = "185.236.240.40";
-      podNet = "10.10.20.0/24";
-      diskBoot = "/dev/sda";
-      mgmtIf = "enp130s0f0";
-    }
-  ];
-
-  pki = rec {
-    root = /opt/hscloud;
-
-    make = (radix: name: rec {
-      ca = root + "/${radix}-ca.crt";
-      cert = root + "/${radix}-${name}.crt";
-      key = root + "/${radix}-${name}.key";
-
-      json = (builtins.toJSON {
-        ca = (builtins.toString ca);
-        cert = (builtins.toString cert);
-        key = (builtins.toString key);
-      });
-    });
-
-    etcdPeer = (make "etcdpeer" "server");
-
-    etcd = {
-        server = (make "etcd" "server");
-        kube = (make "etcd" "kube");
-    };
-
-    makeKube = (name: (make "kube" name) // {
-      config = {
-        server = "https://${k8sapi}:${toString ports.k8sAPIServerSecure}";
-        certFile = (make "kube" name).cert;
-        keyFile = (make "kube" name).key;
-      };
-    });
-
-    kube = rec {
-      ca = apiserver.ca;
-      
-      # Used to identify apiserver.
-      apiserver = (makeKube "apiserver");
-
-      # Used to identify controller-manager.
-      controllermanager = (makeKube "controller-manager");
-
-      # Used to identify scheduler.
-      scheduler = (makeKube "scheduler");
-
-      # Used to identify kube-proxy.
-      proxy = (makeKube "proxy");
-
-      # Used to identify kubelet.
-      kubelet = (makeKube "node");
-
-      # Used to encrypt service accounts.
-      serviceaccounts = (makeKube "serviceaccounts");
-    };
-
-    kubeFront = {
-      apiserver = (make "kubeFront" "apiserver");
-    };
-  };
-
-  ports = {
-    k8sAPIServerPlain = 4000;
-    k8sAPIServerSecure = 4001;
-    k8sControllerManagerPlain = 0; # 4002; do not serve plain http
-    k8sControllerManagerSecure = 4003;
-  };
-}