Get in the Cluster, Benji!

Here we introduce benji [1], a backup system based on backy2. It lets us
backup Ceph RBD objects from Rook into Wasabi, our offsite S3-compatible
storage provider.

Benji runs as a k8s CronJob, every hour at 42 minutes. It does the
following:
 - runs benji-pvc-backup, which iterates over all PVCs in k8s, and backs
   up their respective PVs to Wasabi
 - runs benji enforce, marking backups outside our backup policy [2] as
   to be deleted
 - runs benji cleanup, to remove unneeded backups
 - runs a custom script to backup benji's sqlite3 database into wasabi
   (unencrypted, but we're fine with that - as the metadata only contains
   image/pool names, thus Ceph PV and pool names)

[1] - https://benji-backup.me/index.html
[2] - latest3,hours48,days7,months12, which means the latest 3 backups,
      then one backup for the next 48 hours, then one backup for the next
      7 days, then one backup for the next 12 months, for a total of 65
      backups (deduplicated, of course)

We also drive-by update some docs (make them mmore separated into
user/admin docs).

Change-Id: Ibe0942fd38bc232399c0e1eaddade3f4c98bc6b4
diff --git a/cluster/kube/cluster.jsonnet b/cluster/kube/cluster.jsonnet
index 605b32d..89ffdb0 100644
--- a/cluster/kube/cluster.jsonnet
+++ b/cluster/kube/cluster.jsonnet
@@ -262,6 +262,22 @@
                             },
                         ],
                     },
+                    benji:: {
+                        metadataStorageClass: "waw-hdd-paranoid-2",
+                        encryptionPassword: std.split((importstr "../secrets/plain/k0-benji-encryption-password"), '\n')[0],
+                        pools: [
+                            "waw-hdd-redundant-2",
+                            "waw-hdd-redundant-2-metadata",
+                            "waw-hdd-paranoid-2",
+                            "waw-hdd-yolo-2",
+                        ],
+                        s3Configuration: {
+                            awsAccessKeyId: "RPYZIROFXNLQVU2WJ4R3",
+                            awsSecretAccessKey: std.split((importstr "../secrets/plain/k0-benji-secret-access-key"), '\n')[0],
+                            bucketName: "benji-k0-backups",
+                            endpointUrl: "https://s3.eu-central-1.wasabisys.com/",
+                        },
+                    }
                 },
             },
             // redundant block storage
diff --git a/cluster/kube/lib/rook.libsonnet b/cluster/kube/lib/rook.libsonnet
index 98732b0..8aa51a7 100644
--- a/cluster/kube/lib/rook.libsonnet
+++ b/cluster/kube/lib/rook.libsonnet
@@ -213,18 +213,8 @@
 
         crb: kube.ClusterRoleBinding("ceph-rook-global") {
             metadata+: env.metadata { namespace:: null },
-            roleRef: {
-                apiGroup: "rbac.authorization.k8s.io",
-                kind: "ClusterRole",
-                name: env.crs.global.metadata.name,
-            },
-            subjects: [
-                {
-                    kind: "ServiceAccount",
-                    name: env.sa.metadata.name,
-                    namespace: env.sa.metadata.namespace,
-                },
-            ],
+            roleRef_: env.crs.global,
+            subjects_: [env.sa],
         },
 
         role: kube.Role("ceph-rook-system") {
@@ -245,18 +235,8 @@
 
         rb: kube.RoleBinding("ceph-rook-system") {
             metadata+: env.metadata,
-            roleRef: {
-                apiGroup: "rbac.authorization.k8s.io",
-                kind: "Role",
-                name: env.role.metadata.name,
-            },
-            subjects: [
-                {
-                    kind: "ServiceAccount",
-                    name: env.sa.metadata.name,
-                    namespace: env.sa.metadata.namespace,
-                },
-            ],
+            roleRef_: env.role,
+            subjects_: [env.sa],
         },
 
         operator: kube.Deployment("rook-ceph-operator") {
@@ -369,23 +349,13 @@
         rbs: [
             kube.RoleBinding(cluster.name(el.name)) {
                 metadata+: cluster.metadata,
-                roleRef: {
-                    apiGroup: "rbac.authorization.k8s.io",
-                    kind: el.role.kind,
-                    name: el.role.metadata.name,
-                },
-                subjects: [
-                    {
-                        kind: el.sa.kind,
-                        name: el.sa.metadata.name,
-                        namespace: el.sa.metadata.namespace,
-                    },
-                ],
+                roleRef_: el.role,
+                subjects_: [el.sa],
             },
             for el in [
                 // Allow Operator SA to perform Cluster Mgmt in this namespace.
                 { name: "cluster-mgmt", role: operator.crs.clusterMgmt, sa: operator.sa },
-                { name: "osd", role: cluster.roles.osd, sa: cluster.sa.osd }, 
+                { name: "osd", role: cluster.roles.osd, sa: cluster.sa.osd },
                 { name: "mgr", role: cluster.roles.mgr, sa: cluster.sa.mgr },
                 { name: "mgr-cluster", role: operator.crs.mgrCluster, sa: cluster.sa.mgr },
             ]
@@ -395,18 +365,8 @@
             metadata+: {
                 namespace: operator.cfg.namespace,
             },
-            roleRef: {
-                apiGroup: "rbac.authorization.k8s.io",
-                kind: cluster.roles.mgrSystem.kind,
-                name: cluster.roles.mgrSystem.metadata.name,
-            },
-            subjects: [
-                {
-                    kind: cluster.sa.mgr.kind,
-                    name: cluster.sa.mgr.metadata.name,
-                    namespace: cluster.sa.mgr.metadata.namespace,
-                },
-            ],
+            roleRef_: cluster.roles.mgrSystem,
+            subjects_: [cluster.sa.mgr],
         },
 
         cluster: kube._Object("ceph.rook.io/v1", "CephCluster", name) {
@@ -431,7 +391,7 @@
             metadata+: cluster.metadata,
             spec: {
                 ports: [
-                    { name: "dashboard", port: 80, targetPort: 8080, protocol: "TCP" }, 
+                    { name: "dashboard", port: 80, targetPort: 8080, protocol: "TCP" },
                 ],
                 selector: {
                     app: "rook-ceph-mgr",
@@ -466,7 +426,259 @@
                     }
                 ],
             },
-        }
+        },
+
+        # Benji is a backup tool, external to rook, that we use for backing up
+        # RBDs.
+        benji: {
+            sa: kube.ServiceAccount(cluster.name("benji")) {
+                metadata+: cluster.metadata,
+            },
+
+            cr: kube.ClusterRole(cluster.name("benji")) {
+                rules: [
+                    {
+                        apiGroups: [""],
+                        resources: [
+                            "persistentvolumes",
+                            "persistentvolumeclaims"
+                        ],
+                        verbs: ["list", "get"],
+                    },
+                    {
+                        apiGroups: [""],
+                        resources: [
+                            "events",
+                        ],
+                        verbs: ["create", "update"],
+                    },
+                ],
+            },
+
+            crb: kube.ClusterRoleBinding(cluster.name("benji")) {
+                roleRef_: cluster.benji.cr,
+                subjects_: [cluster.benji.sa],
+            },
+
+            config: kube.Secret(cluster.name("benji-config")) {
+                metadata+: cluster.metadata,
+                data_: {
+                    "benji.yaml": std.manifestJson({
+                        configurationVersion: '1',
+                        databaseEngine: 'sqlite:////data/benji.sqlite',
+                        defaultStorage: 'wasabi',
+                        storages: [
+                            {
+                                name: "wasabi",
+                                storageId: 1,
+                                module: "s3",
+                                configuration: cluster.spec.benji.s3Configuration {
+                                    activeTransforms: ["encrypt"],
+                                },
+                            },
+                        ],
+                        transforms: [
+                            {
+                                name: "encrypt",
+                                module: "aes_256_gcm",
+                                configuration: {
+                                    # not secret.
+                                    kdfSalt: "T2huZzZpcGhhaWM3QWVwaDhybzRhaDNhbzFpc2VpOWFobDNSZWVQaGVvTWV1bmVaYWVsNHRoYWg5QWVENHNoYWg0ZGFoN3Rlb3NvcHVuZzNpZXZpMm9vTG9vbmc1YWlmb0RlZXAwYmFobDlab294b2hjaG9odjRhbzFsYWkwYWk=",
+                                    kdfIterations: 2137,
+                                    password: cluster.spec.benji.encryptionPassword,
+                                },
+                            },
+                        ],
+                        ios: [
+                            { name: pool, module: "rbd" }
+                            for pool in cluster.spec.benji.pools
+                        ],
+                    }),
+                },
+            },
+
+            # Yes, Benji keeps data (backup metadata) on the ceph cluster that
+            # it backs up. However:
+            #  - we add a command to benji-k8s to also copy over the sqlite
+            #    database over to s3
+            #  - benji can, in a pinch, restore without a database if a version
+            #    is known: https://benji-backup.me/restore.html#restoring-without-a-database
+            data: kube.PersistentVolumeClaim(cluster.name("benji-data")) {
+                metadata+: cluster.metadata,
+                spec+: {
+                    storageClassName: cluster.spec.benji.metadataStorageClass,
+                    accessModes: [ "ReadWriteOnce" ],
+                    resources: {
+                        requests: {
+                            storage: "1Gi",
+                        },
+                    },
+                },
+            },
+
+            # Extra scripts.
+            extrabins: kube.ConfigMap(cluster.name("benji-extrabins")) {
+                metadata+: cluster.metadata,
+                data: {
+                    "metabackup.sh" : |||
+                        # Make backups of sqlite3 metadata used by Benji.
+                        # The backups live in the same bucket as backups, and the metabackups
+                        # are named `metabackup-0..10`, where 0 is the newest backup. Any time
+                        # this script is called, backups get shifted one way to the left (9 to 10,
+                        # 8 to 9, etc). This ensures we have at least 10 backup replicas.
+
+                        set -e
+
+                        which s3cmd || pip install --upgrade s3cmd
+
+                        AWS_ACCESS_KEY_ID=$(jq -r .storages[0].configuration.awsAccessKeyId < /etc/benji/benji.yaml)
+                        AWS_SECRET_ACCESS_KEY=$(jq -r .storages[0].configuration.awsSecretAccessKey < /etc/benji/benji.yaml)
+                        BUCKET=$(jq -r .storages[0].configuration.bucketName < /etc/benji/benji.yaml)
+
+                        s3() {
+                            s3cmd --host=s3.wasabisys.com \
+                                "--host-bucket=%(bucket)s.s3.wasabisys.com" \
+                                --region=eu-central-1 \
+                                --access_key=$AWS_ACCESS_KEY_ID \
+                                --secret_key=$AWS_SECRET_ACCESS_KEY \
+                                "$@"
+                        }
+
+                        # Copy over old backups, if they exist.
+                        for i in `seq 9 -1 0`; do
+                            from="s3://$BUCKET/metabackup-$i.sqlite3"
+                            to="s3://$BUCKET/metabackup-$((i+1)).sqlite3"
+
+                            if [[ $(s3 ls $from | wc -l) -eq 0 ]]; then
+                                echo "$from does not exist, skipping shift."
+                                continue
+                            fi
+                            echo "Moving $from to $to..."
+                            s3 mv $from $to
+                        done
+
+                        # Make new metabackup.
+                        s3 put /data/benji.sqlite s3://$BUCKET/metabackup-0.sqlite3
+
+                    |||,
+                    "get-rook-creds.sh": |||
+                        # Based on the Rook Toolbox /usr/local/bin/toolbox.sh script.
+                        # Copyright 2016 The Rook Authors. All rights reserved.
+
+                        CEPH_CONFIG="/etc/ceph/ceph.conf"
+                        MON_CONFIG="/etc/rook/mon-endpoints"
+                        KEYRING_FILE="/etc/ceph/keyring"
+
+                        # create a ceph config file in its default location so ceph/rados tools can be used
+                        # without specifying any arguments
+                        write_endpoints() {
+                            endpoints=$(cat ${MON_CONFIG})
+
+                            # filter out the mon names
+                            mon_endpoints=$(echo ${endpoints} | sed 's/[a-z]\+=//g')
+
+                            # filter out the legacy mon names
+                            mon_endpoints=$(echo ${mon_endpoints} | sed 's/rook-ceph-mon[0-9]\+=//g')
+
+                            DATE=$(date)
+                            echo "$DATE writing mon endpoints to ${CEPH_CONFIG}: ${endpoints}"
+                            cat <<EOF > ${CEPH_CONFIG}
+                        [global]
+                        mon_host = ${mon_endpoints}
+
+                        [client.admin]
+                        keyring = ${KEYRING_FILE}
+                        EOF
+                        }
+
+                        # watch the endpoints config file and update if the mon endpoints ever change
+                        watch_endpoints() {
+                            # get the timestamp for the target of the soft link
+                            real_path=$(realpath ${MON_CONFIG})
+                            initial_time=$(stat -c %Z ${real_path})
+                            while true; do
+                               real_path=$(realpath ${MON_CONFIG})
+                               latest_time=$(stat -c %Z ${real_path})
+
+                               if [[ "${latest_time}" != "${initial_time}" ]]; then
+                                 write_endpoints
+                                 initial_time=${latest_time}
+                               fi
+                               sleep 10
+                            done
+                        }
+
+                        # create the keyring file
+                        cat <<EOF > ${KEYRING_FILE}
+                        [client.admin]
+                        key = ${ROOK_ADMIN_SECRET}
+                        EOF
+
+                        # write the initial config file
+                        write_endpoints
+
+                        # continuously update the mon endpoints if they fail over
+                        watch_endpoints &
+                    |||
+                },
+            },
+
+            cronjob: kube.CronJob(cluster.name("benji")) {
+                metadata+: cluster.metadata,
+                spec+: { # CronJob Spec
+                    schedule: "42 * * * *", # Hourly at 42 minute past.
+                    jobTemplate+: {
+                        spec+: { # Job Spec
+                            selector:: null,
+                            template+: {
+                                spec+: { # PodSpec
+                                    serviceAccountName: cluster.benji.sa.metadata.name,
+                                    containers_: {
+                                        benji: kube.Container(cluster.name("benji")) {
+                                            # TODO(q3k): switch back to upstream after pull/52 goes in.
+                                            # Currently this is being built from github.com/q3k/benji.
+                                            # https://github.com/elemental-lf/benji/pull/52
+                                            image: "registry.k0.hswaw.net/q3k/benji-k8s:20190831-1351",
+                                            volumeMounts_: {
+                                                extrabins: { mountPath: "/usr/local/extrabins" },
+                                                monendpoints: { mountPath: "/etc/rook" },
+                                                benjiconfig: { mountPath: "/etc/benji" },
+                                                data: { mountPath: "/data" },
+                                            },
+                                            env_: {
+                                                ROOK_ADMIN_SECRET: { secretKeyRef: { name: "rook-ceph-mon", key: "admin-secret" }},
+                                            },
+                                            command: [
+                                                "bash", "-c", |||
+                                                    bash /usr/local/extrabins/get-rook-creds.sh
+                                                    benji-backup-pvc
+                                                    benji-command enforce latest3,hours48,days7,months12
+                                                    benji-command cleanup
+                                                    bash /usr/local/extrabins/metabackup.sh
+                                                |||,
+                                            ],
+                                        },
+                                    },
+                                    volumes_: {
+                                        data: kube.PersistentVolumeClaimVolume(cluster.benji.data),
+                                        benjiconfig: kube.SecretVolume(cluster.benji.config),
+                                        extrabins: kube.ConfigMapVolume(cluster.benji.extrabins),
+                                        monendpoints: {
+                                            configMap: {
+                                                name: "rook-ceph-mon-endpoints",
+                                                items: [
+                                                    { key: "data", path: "mon-endpoints" },
+                                                ],
+                                            },
+                                        },
+                                    },
+                                },
+                            },
+                        },
+                    },
+                },
+            },
+        },
     },
 
     ReplicatedBlockPool(cluster, name):: {