cluster/nix: locally build nixos derivations

We change the existing behaviour (copy files & run nixos-rebuild switch)
to something closer to nixops-style. This now means that provisioning
admin machines need Nix installed locally, but that's probably an okay
choice to make.

The upside of this approach is that it's easier to debug and test
derivations, as all data is local to the repo and the workstation, and
deploying just means copying a configuration closure and switching the
system to it. At some point we should even be able to run the entire
cluster within a set of test VMs.

We also bump the kubernetes control plane to 1.14. Kubelets are still at
1.13 and their upgrade is comint up today too.

Change-Id: Ia9832c47f258ee223d93893d27946d1161cc4bbd
diff --git a/cluster/nix/module-kubernetes.nix b/cluster/nix/module-kubernetes.nix
new file mode 100644
index 0000000..1e71ada
--- /dev/null
+++ b/cluster/nix/module-kubernetes.nix
@@ -0,0 +1,216 @@
+{ config, pkgs, lib, ... }:
+
+with (( import ./defs-cluster-k0.nix ) config.networking.hostName);
+let
+  # Pin for k8s packages. This is so that upagrading the system will not upgrade the k8s control or data planes.
+  k8spkgs = import (fetchGit {
+    # Now at 1.14.3
+    name = "nixos-unstable-2019-06-17";
+    url = https://github.com/nixos/nixpkgs-channels/;
+    rev = "415e8e5820b7825fb74a6c7986bf6af725227eaa";
+  }) {};
+  # Pin for kubelet
+  k8spkgsKubelet = import (fetchGit {
+    # Now at 1.13.5
+    name = "nixos-unstable-2019-04-12";
+    url = https://github.com/nixos/nixpkgs-channels/;
+    rev = "1fc591f9a5bd1b016b5d66dfab29560073955a14";
+  }) {};
+
+in rec {
+  # Disable kubelet service and bring in our own override.
+  # Also nuke flannel from the orbit.
+  disabledModules = [
+    "services/cluster/kubernetes/kubelet.nix"
+    "services/cluster/kubernetes/flannel.nix"
+  ];
+
+  imports =
+    [
+      ./module-kubelet.nix
+    ];
+
+  # List services that you want to enable:
+  virtualisation.docker.enable = true;
+  virtualisation.docker.extraOptions = "--iptables=false --ip-masq=false --ip-forward=true";
+
+  # Docker 1.13 sets iptables FORWARD to DROP. Unfuck this.
+  systemd.services."docker-iptables-unfuck" = {
+    enable = true;
+    wantedBy = [ "kubernetes.target" ];
+    description = "Docker iptable Unfuck";
+    after = [ "docker.service" ];
+    requires = [ "docker.service" ];
+    path = [ pkgs.iptables ];
+    script = ''
+      iptables -P FORWARD ACCEPT
+    '';
+    serviceConfig.Type = "oneshot";
+  };
+
+  networking.firewall.enable = false;
+
+  # Point k8s apiserver address at ourselves, as every machine runs an apiserver with this cert name.
+  networking.extraHosts = ''
+    127.0.0.1 ${k8sapi}
+  '';
+
+  security.acme.certs = {
+    host = {
+      email = acmeEmail;
+      domain = fqdn;
+      webroot = services.nginx.virtualHosts.host.root;
+    };
+  };
+
+  services.nginx = {
+    enable = true;
+    virtualHosts.host = {
+      serverName = fqdn;
+      root = "/var/www/${fqdn}";
+    };
+  };
+
+  services.etcd = rec {
+    enable = true;
+    name = fqdn;
+    listenClientUrls = ["https://0.0.0.0:2379"];
+    advertiseClientUrls = ["https://${fqdn}:2379"];
+    listenPeerUrls = ["https://0.0.0.0:2380"];
+    initialAdvertisePeerUrls = ["https://${fqdn}:2380"];
+    initialCluster = (map (n: "${n.fqdn}=https://${n.fqdn}:2380") machines);
+    initialClusterState = "existing";
+
+    clientCertAuth = true;
+    trustedCaFile = pki.etcd.server.ca;
+    certFile = pki.etcd.server.cert;
+    keyFile = pki.etcd.server.key;
+
+    peerClientCertAuth = true;
+    peerTrustedCaFile = pki.etcdPeer.ca;
+    peerCertFile = pki.etcdPeer.cert;
+    peerKeyFile = pki.etcdPeer.key;
+
+    extraConf = {
+      PEER_CLIENT_CERT_AUTH = "true";
+    };
+  };
+
+  services.kubernetes = {
+    # Pin to specific k8s package.
+    package = k8spkgs.kubernetes;
+    roles = []; # We do not use any nixpkgs predefined roles for k8s. Instead,
+                # we enable k8s components manually.
+
+    caFile = pki.kube.apiserver.ca;
+    clusterCidr = "10.10.16.0/20";
+
+    path = [ pkgs.e2fsprogs ]; # kubelet wants to mkfs.ext4 when mounting pvcs
+
+    addons.dns.enable = false;
+
+    apiserver = rec {
+      enable = true;
+      insecurePort = ports.k8sAPIServerPlain;
+      securePort = ports.k8sAPIServerSecure;
+      advertiseAddress = "${machine.ipAddr}";
+
+      etcd = {
+        # https://github.com/kubernetes/kubernetes/issues/72102
+        servers = (map (n: "https://${n.fqdn}:2379") ( [ machine ] ));
+        caFile = pki.etcd.kube.ca;
+        keyFile = pki.etcd.kube.key;
+        certFile = pki.etcd.kube.cert;
+      };
+
+      tlsCertFile = pki.kube.apiserver.cert;
+      tlsKeyFile = pki.kube.apiserver.key;
+
+      clientCaFile = pki.kube.apiserver.ca;
+
+      kubeletHttps = true;
+      kubeletClientCaFile = pki.kube.apiserver.ca;
+      kubeletClientCertFile = pki.kube.apiserver.cert;
+      kubeletClientKeyFile = pki.kube.apiserver.key;
+
+      serviceAccountKeyFile = pki.kube.serviceaccounts.key;
+
+      allowPrivileged = true;
+      serviceClusterIpRange = "10.10.12.0/24";
+      runtimeConfig = "api/all,authentication.k8s.io/v1beta1";
+      authorizationMode = ["Node" "RBAC"];
+      enableAdmissionPlugins = ["NamespaceLifecycle" "NodeRestriction" "LimitRanger" "ServiceAccount" "DefaultStorageClass" "ResourceQuota" "PodSecurityPolicy"];
+      extraOpts = ''
+        --apiserver-count=5 \
+        --proxy-client-cert-file=${pki.kubeFront.apiserver.cert} \
+        --proxy-client-key-file=${pki.kubeFront.apiserver.key} \
+        --requestheader-allowed-names= \
+        --requestheader-client-ca-file=${pki.kubeFront.apiserver.ca} \
+        --requestheader-extra-headers-prefix=X-Remote-Extra- \
+        --requestheader-group-headers=X-Remote-Group  \
+        --requestheader-username-headers=X-Remote-User \
+        -v=5
+      '';
+    };
+
+    controllerManager = {
+      enable = true;
+      bindAddress = "0.0.0.0";
+      insecurePort = ports.k8sControllerManagerPlain;
+      leaderElect = true;
+      serviceAccountKeyFile = pki.kube.serviceaccounts.key;
+      rootCaFile = pki.kube.ca;
+      extraOpts = ''
+        --service-cluster-ip-range=10.10.12.0/24 \
+        --use-service-account-credentials=true \
+        --secure-port=${toString ports.k8sControllerManagerSecure}\
+      '';
+      kubeconfig = pki.kube.controllermanager.config;
+    };
+
+    scheduler = {
+      enable = true;
+      address = "0.0.0.0";
+      port = 0;
+      leaderElect = true;
+      kubeconfig = pki.kube.scheduler.config;
+    };
+
+    proxy = {
+      enable = true;
+      kubeconfig = pki.kube.proxy.config;
+      extraOpts = ''
+        --hostname-override=${fqdn}\
+        --proxy-mode=iptables
+      '';
+    };
+
+    kubelet = {
+      enable = true;
+      unschedulable = false;
+      hostname = fqdn;
+      tlsCertFile = pki.kube.kubelet.cert;
+      tlsKeyFile = pki.kube.kubelet.key;
+      clientCaFile = pki.kube.kubelet.ca;
+      nodeIp = machine.ipAddr;
+      networkPlugin = "cni";
+      clusterDns = "10.10.12.254";
+      kubeconfig = pki.kube.kubelet.config;
+      extraOpts = ''
+        --read-only-port=0
+      '';
+      package = k8spkgsKubelet.kubernetes;
+    };
+
+  };
+
+  # https://github.com/NixOS/nixpkgs/issues/60687
+  systemd.services.kube-control-plane-online = {
+    preStart = pkgs.lib.mkForce "";
+  };
+  # this seems to depend on flannel
+  # TODO(q3k): file issue
+  systemd.services.kubelet-online = {
+    script = pkgs.lib.mkForce "sleep 1";
+  };
+}