*: rejigger tls certs and more
This pretty large change does the following:
- moves nix from bootstrap.hswaw.net to nix/
- changes clustercfg to use cfssl and moves it to cluster/clustercfg
- changes clustercfg to source information about target location of
certs from nix
- changes clustercfg to push nix config
- changes tls certs to have more than one CA
- recalculates all TLS certs
(it keeps the old serviceaccoutns key, otherwise we end up with
invalid serviceaccounts - the cert doesn't match, but who cares,
it's not used anyway)
diff --git a/cluster/clustercfg/BUILD b/cluster/clustercfg/BUILD
new file mode 100644
index 0000000..5ada23c
--- /dev/null
+++ b/cluster/clustercfg/BUILD
@@ -0,0 +1,15 @@
+load("@py_deps//:requirements.bzl", "requirement")
+
+py_binary(
+ name = "clustercfg",
+ srcs = [
+ "clustercfg.py",
+ "ca.py",
+ ],
+ visibility = ["//visibility:public"],
+ deps = [
+ requirement("fabric"),
+ requirement("future"),
+ "//tools:secretstore_lib",
+ ],
+)
diff --git a/cluster/clustercfg/ca.py b/cluster/clustercfg/ca.py
new file mode 100644
index 0000000..e4973db
--- /dev/null
+++ b/cluster/clustercfg/ca.py
@@ -0,0 +1,239 @@
+import json
+import logging
+import os
+from six import StringIO
+import subprocess
+
+
+logger = logging.getLogger(__name__)
+
+
+_std_subj = {
+ "C": "PL",
+ "ST": "Mazowieckie",
+ "L": "Warsaw",
+ "O": "Warsaw Hackerspace",
+ "OU": "clustercfg",
+}
+
+_ca_csr = {
+ "CN": "Prototype Test Certificate Authority",
+ "key": {
+ "algo": "rsa",
+ "size": 2048
+ },
+ "names": [ _std_subj ],
+}
+
+_ca_config = {
+ "signing": {
+ "default": {
+ "expiry": "168h"
+ },
+ "profiles": {
+ "server": {
+ "expiry": "8760h",
+ "usages": [
+ "signing",
+ "key encipherment",
+ "server auth"
+ ]
+ },
+ "client": {
+ "expiry": "8760h",
+ "usages": [
+ "signing",
+ "key encipherment",
+ "client auth"
+ ]
+ },
+ "client-server": {
+ "expiry": "8760h",
+ "usages": [
+ "signing",
+ "key encipherment",
+ "server auth",
+ "client auth"
+ ]
+ }
+ }
+ }
+}
+
+
+class CAException(Exception):
+ pass
+
+
+class CA(object):
+ def __init__(self, secretstore, certdir, short, cn):
+ self.ss = secretstore
+ self.cdir = certdir
+ self.short = short
+ self.cn = cn
+ self._init_ca()
+
+ def __str__(self):
+ return 'CN={} ({})'.format(self.cn, self.short)
+
+ @property
+ def _secret_key(self):
+ return 'ca-{}.key'.format(self.short)
+
+ @property
+ def _cert(self):
+ return os.path.join(self.cdir, 'ca-{}.crt'.format(self.short))
+
+ @property
+ def cert_data(self):
+ with open(self._cert) as f:
+ return f.read()
+
+ def _init_ca(self):
+ if self.ss.exists(self._secret_key):
+ return
+
+ ca_csr = dict(_ca_csr)
+ ca_csr['CN'] = self.cn
+
+ logger.info("{}: Generating CA...".format(self))
+ p = subprocess.Popen(['cfssl', 'gencert', '-initca', '-'],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ outs, errs = p.communicate(json.dumps(ca_csr).encode())
+ out = json.loads(outs)
+
+ f = self.ss.open(self._secret_key, 'w')
+ f.write(out['key'])
+ f.close()
+
+ f = open(self._cert, 'w')
+ f.write(out['cert'])
+ f.close()
+
+ def gen_key(self, hosts, o=_std_subj['O'], ou=_std_subj['OU'], save=None):
+ """お元気ですか?"""
+ cfg = {
+ "CN": hosts[0],
+ "hosts": hosts,
+ "key": {
+ "algo": "rsa",
+ "size": 4096,
+ },
+ "names": [
+ {
+ "C": _std_subj["C"],
+ "ST": _std_subj["ST"],
+ "L": _std_subj["L"],
+ "O": o,
+ "OU": ou,
+ },
+ ],
+ }
+ cfg.update(_ca_config)
+ logger.info("{}: Generating key/CSR for {}".format(self, hosts))
+ p = subprocess.Popen(['cfssl', 'genkey', '-'],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ outs, errs = p.communicate(json.dumps(cfg).encode())
+ out = json.loads(outs)
+ key, csr = out['key'], out['csr']
+ if save is not None:
+ logging.info("{}: Saving new key to secret {}".format(self, save))
+ f = self.ss.open(save, 'w')
+ f.write(key)
+ f.close()
+
+ return key, csr
+
+ def sign(self, csr, save=None):
+ logging.info("{}: Signing CSR".format(self))
+ ca = self._cert
+ cakey = self.ss.plaintext(self._secret_key)
+ p = subprocess.Popen(['cfssl', 'sign', '-ca=' + ca, '-ca-key=' + cakey,
+ '-profile=client-server', '-'],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ outs, errs = p.communicate(csr.encode())
+ out = json.loads(outs)
+ cert = out['cert']
+ if save is not None:
+ name = os.path.join(self.cdir, save)
+ logging.info("{}: Saving new certificate to {}".format(self, name))
+ f = open(name, 'w')
+ f.write(cert)
+ f.close()
+
+ return cert
+
+ def upload(self, c, remote_cert):
+ logger.info("Uploading CA {} to {}".format(self, remote_cert))
+ c.put(local=self._cert, remote=remote_cert)
+
+ def make_cert(self, *a, **kw):
+ return ManagedCertificate(self, *a, **kw)
+
+
+class ManagedCertificate(object):
+ def __init__(self, ca, name, hosts, o=None, ou=None):
+ self.ca = ca
+
+ self.hosts = hosts
+ self.name = name
+ self.key = '{}.key'.format(name)
+ self.cert = '{}.cert'.format(name)
+ self.o = o
+ self.ou = ou
+
+ self.ensure()
+
+ def __str__(self):
+ return '{}'.format(self.name)
+
+ @property
+ def key_exists(self):
+ return self.ca.ss.exists(self.key)
+
+ @property
+ def key_data(self):
+ f = open(self.ca.ss.open(self.key))
+ d = f.read()
+ f.close()
+ return d
+
+ @property
+ def key_path(self):
+ return self.ca.ss.plaintext(self.key)
+
+ @property
+ def cert_path(self):
+ return os.path.join(self.ca.cdir, self.cert)
+
+ @property
+ def cert_exists(self):
+ return os.path.exists(self.cert_path)
+
+ @property
+ def cert_data(self):
+ with open(self.cert_path) as f:
+ return f.read()
+
+ def ensure(self):
+ if self.key_exists and self.cert_exists:
+ return
+
+ logger.info("{}: Generating...".format(self))
+ key, csr = self.ca.gen_key(self.hosts, o=self.o, ou=self.ou, save=self.key)
+ self.ca.sign(csr, save=self.cert)
+
+ def upload(self, c, remote_cert, remote_key, concat_ca=False):
+ logger.info("Uploading Cert {} to {} & {}".format(self, remote_cert, remote_key))
+ if concat_ca:
+ f = StringIO(self.cert_data + self.ca.cert_data)
+ c.put(local=f, remote=remote_cert)
+ else:
+ c.put(local=self.cert_path, remote=remote_cert)
+ c.put(local=self.key_path, remote=remote_key)
+
+ def upload_pki(self, c, pki, concat_ca=False):
+ self.upload(c, pki['cert'], pki['key'], concat_ca)
diff --git a/cluster/clustercfg/clustercfg.py b/cluster/clustercfg/clustercfg.py
new file mode 100644
index 0000000..a438a4c
--- /dev/null
+++ b/cluster/clustercfg/clustercfg.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python
+
+from builtins import object
+
+import datetime
+from io import BytesIO
+import json
+import logging
+import os
+import tempfile
+import subprocess
+import sys
+
+from cryptography import x509
+from cryptography.hazmat.backends import default_backend
+import fabric
+
+from tools import secretstore
+
+import ca
+
+
+local_root = os.getenv('hscloud_root')
+if local_root is None:
+ raise Exception("Please source env.sh")
+
+
+cluster = 'k0.hswaw.net'
+remote_root = '/opt/hscloud'
+ss = secretstore.SecretStore(
+ plain_root=os.path.join(local_root, 'cluster/secrets/plain'),
+ cipher_root=os.path.join(local_root, 'cluster/secrets/cipher'))
+
+
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+formatter = logging.Formatter('%(levelname)s - %(message)s')
+sh = logging.StreamHandler()
+sh.setFormatter(formatter)
+logger.addHandler(sh)
+
+
+
+def pki_config(key):
+ raw = subprocess.check_output([
+ 'nix', 'eval', '--raw',
+ '( (import ' + local_root + '/nix/toplevel.nix ).pki.' + key + '.json )',
+ ])
+ return json.loads(raw)
+
+
+def _file_exists(c, filename):
+ res = c.run('stat "{}"'.format(filename), warn=True, hide=True)
+ return res.exited == 0
+
+
+def configure_k8s(username, ca, cert, key):
+ subprocess.check_call([
+ 'kubectl', 'config',
+ 'set-cluster', cluster,
+ '--certificate-authority=' + ca,
+ '--embed-certs=true',
+ '--server=https://' + cluster + ':4001',
+ ])
+ subprocess.check_call([
+ 'kubectl', 'config',
+ 'set-credentials', username,
+ '--client-certificate=' + cert,
+ '--client-key=' + key,
+ '--embed-certs=true',
+ ])
+ subprocess.check_call([
+ 'kubectl', 'config',
+ 'set-context', cluster,
+ '--cluster=' + cluster,
+ '--user=' + username,
+ ])
+ subprocess.check_call([
+ 'kubectl', 'config',
+ 'use-context', cluster,
+ ])
+
+
+def admincreds(args):
+ if len(args) != 1:
+ sys.stderr.write("Usage: admincreds q3k\n")
+ return 1
+ username = args[0]
+
+ ## Make kube certificates.
+ certs_root = os.path.join(local_root, 'cluster/certs')
+ ca_kube = ca.CA(ss, certs_root, 'kube', 'kubernetes main CA')
+
+ local_key = os.path.join(local_root, '.kubectl/admin.key')
+ local_crt = os.path.join(local_root, '.kubectl/admin.crt')
+
+ kubectl = os.path.join(local_root, '.kubectl')
+ if not os.path.exists(kubectl):
+ os.mkdir(kubectl)
+
+ generate_cert = False
+ if not os.path.exists(local_key):
+ generate_cert = True
+
+ if os.path.exists(local_crt):
+ with open(local_crt, 'rb') as f:
+ b = f.read()
+ cert = x509.load_pem_x509_certificate(b, default_backend())
+ delta = cert.not_valid_after - datetime.datetime.now()
+ logger.info("admin: existing cert expiry: {}".format(delta))
+ if delta.total_seconds() < 3600 * 24:
+ logger.info("admin: expires soon, regenerating")
+ generate_cert = True
+ else:
+ generate_cert = True
+
+ if not generate_cert:
+ return configure_k8s(username, ca_kube._cert, local_crt, local_key)
+
+ key, csr = ca_kube.gen_key(hosts=['admin', username], o='system:masters', ou='Kube Admin Account')
+ crt = ca_kube.sign(csr)
+
+ with open(local_key, 'w') as f:
+ f.write(key)
+
+ with open(local_crt, 'w') as f:
+ f.write(crt)
+
+ configure_k8s(username, ca_kube._cert, local_crt, local_key)
+
+
+def nodestrap(args):
+ if len(args) != 1:
+ sys.stderr.write("Usage: nodestrap bc01n01.hswaw.net\n")
+ return 1
+ fqdn = args[0]
+
+ logger.info("Nodestrapping {}...".format(fqdn))
+ r = fabric.Connection('root@{}'.format(fqdn))
+
+ cfg = dict((k, pki_config(k)) for k in [
+ 'etcdPeer', 'etcd.server', 'etcd.kube'
+ ])
+ certs_root = os.path.join(local_root, 'cluster/certs')
+
+ # Make etcd peer certificate for node.
+ ca_etcd_peer = ca.CA(ss, certs_root, 'etcdpeer', 'etcd peer ca')
+ ca_etcd_peer.upload(r, cfg['etcdPeer']['ca'])
+ c = ca_etcd_peer.make_cert('etcdpeer-{}'.format(fqdn), hosts=[fqdn], ou='node etcd peer certificate')
+ c.upload_pki(r, cfg['etcdPeer'])
+
+ # Make etcd server certificate for node and client certificate for kube.
+ ca_etcd = ca.CA(ss, certs_root, 'etcd', 'etcd ca')
+ ca_etcd.upload(r, cfg['etcd.server']['ca'])
+
+ c = ca_etcd.make_cert('etcd-{}'.format(fqdn), hosts=[fqdn], ou='node etcd server certificate')
+ c.upload_pki(r, cfg['etcd.server'])
+
+ c = ca_etcd.make_cert('etcd-kube', hosts=['kube'], ou='kube etcd client certificate')
+ c.upload_pki(r, cfg['etcd.kube'])
+
+ # Make root etcd client (do not upload).
+ ca_etcd.make_cert('etcd-root', hosts=['root'], ou='root etcd client certificate')
+
+ # Make calico etcd client (do not upload, used by jsonnet).
+ ca_etcd.make_cert('etcd-calico', hosts=['calico'], ou='root etcd client certificate')
+
+ ## Make kube certificates.
+ ca_kube = ca.CA(ss, certs_root, 'kube', 'kubernetes main CA')
+
+ # Make kubelet certificate (per node).
+ c = ca_kube.make_cert('kube-kubelet-'+fqdn, o='system:nodes', ou='Kubelet', hosts=['system:node:'+fqdn, fqdn])
+ c.upload_pki(r, pki_config('kube.kubelet'))
+
+ # Make apiserver certificate.
+ c = ca_kube.make_cert('kube-apiserver', ou='Kubernetes API', hosts=[cluster, '10.10.12.1'])
+ c.upload_pki(r, pki_config('kube.apiserver'), concat_ca=True)
+
+ # Make service accounts decryption key (as cert for consistency).
+ c = ca_kube.make_cert('kube-serviceaccounts', ou='Kubernetes Service Accounts Signer', hosts=['serviceaccounts'])
+ c.upload_pki(r, pki_config('kube.serviceaccounts'))
+
+ # Make kube component certificates.
+ kube_components = ['controllermanager', 'scheduler', 'proxy']
+ cfg = dict((k, pki_config('kube.' + k)) for k in kube_components)
+ for k in kube_components:
+ ca_kube.upload(r, cfg[k]['ca'])
+ # meh
+ if k == 'controllermanager':
+ o = 'system:kube-controller-manager'
+ else:
+ o = 'system:kube-'+k
+ ou = 'Kubernetes Component '+k
+ c = ca_kube.make_cert('kube-'+k, ou=ou, o=o, hosts=[o,])
+ c.upload_pki(r, cfg[k])
+
+ ## Make kubefront certificates.
+ ca_kubefront = ca.CA(ss, certs_root, 'kubefront', 'kubernetes frontend CA')
+ ca_kubefront.upload(r, pki_config('kubeFront.apiserver')['ca'])
+ c = ca_kubefront.make_cert('kubefront-apiserver', ou='Kubernetes Frontend', hosts=['apiserver'])
+ c.upload_pki(r, pki_config('kubeFront.apiserver'))
+
+ # Upload NixOS config
+ for f in ['toplevel', 'cluster-configuration']:
+ r.put(local=os.path.join(local_root, 'nix/{}.nix'.format(f)),
+ remote='/etc/nixos/{}.nix'.format(f))
+
+ r.run('nixos-rebuild switch')
+
+
+def usage():
+ sys.stderr.write("Usage: {} <nodestrap|admincreds|config>\n".format(sys.argv[0]))
+
+
+def main():
+ if len(sys.argv) < 2:
+ usage()
+ return 1
+
+ mode = sys.argv[1]
+ if mode == "nodestrap":
+ return nodestrap(sys.argv[2:])
+ elif mode == "admincreds":
+ return admincreds(sys.argv[2:])
+ elif mode == "config":
+ print('etcd peer:')
+ print(json.dumps(pki_config('etcdPeer'), indent=2))
+ print('etcd client:')
+ print(json.dumps(pki_config('etcdClient'), indent=2))
+ else:
+ usage()
+ return 1
+
+if __name__ == '__main__':
+ sys.exit(main() or 0)