Get in the Cluster, Benji!
Here we introduce benji [1], a backup system based on backy2. It lets us
backup Ceph RBD objects from Rook into Wasabi, our offsite S3-compatible
storage provider.
Benji runs as a k8s CronJob, every hour at 42 minutes. It does the
following:
- runs benji-pvc-backup, which iterates over all PVCs in k8s, and backs
up their respective PVs to Wasabi
- runs benji enforce, marking backups outside our backup policy [2] as
to be deleted
- runs benji cleanup, to remove unneeded backups
- runs a custom script to backup benji's sqlite3 database into wasabi
(unencrypted, but we're fine with that - as the metadata only contains
image/pool names, thus Ceph PV and pool names)
[1] - https://benji-backup.me/index.html
[2] - latest3,hours48,days7,months12, which means the latest 3 backups,
then one backup for the next 48 hours, then one backup for the next
7 days, then one backup for the next 12 months, for a total of 65
backups (deduplicated, of course)
We also drive-by update some docs (make them mmore separated into
user/admin docs).
Change-Id: Ibe0942fd38bc232399c0e1eaddade3f4c98bc6b4
diff --git a/cluster/kube/cluster.jsonnet b/cluster/kube/cluster.jsonnet
index 605b32d..89ffdb0 100644
--- a/cluster/kube/cluster.jsonnet
+++ b/cluster/kube/cluster.jsonnet
@@ -262,6 +262,22 @@
},
],
},
+ benji:: {
+ metadataStorageClass: "waw-hdd-paranoid-2",
+ encryptionPassword: std.split((importstr "../secrets/plain/k0-benji-encryption-password"), '\n')[0],
+ pools: [
+ "waw-hdd-redundant-2",
+ "waw-hdd-redundant-2-metadata",
+ "waw-hdd-paranoid-2",
+ "waw-hdd-yolo-2",
+ ],
+ s3Configuration: {
+ awsAccessKeyId: "RPYZIROFXNLQVU2WJ4R3",
+ awsSecretAccessKey: std.split((importstr "../secrets/plain/k0-benji-secret-access-key"), '\n')[0],
+ bucketName: "benji-k0-backups",
+ endpointUrl: "https://s3.eu-central-1.wasabisys.com/",
+ },
+ }
},
},
// redundant block storage
diff --git a/cluster/kube/lib/rook.libsonnet b/cluster/kube/lib/rook.libsonnet
index 98732b0..8aa51a7 100644
--- a/cluster/kube/lib/rook.libsonnet
+++ b/cluster/kube/lib/rook.libsonnet
@@ -213,18 +213,8 @@
crb: kube.ClusterRoleBinding("ceph-rook-global") {
metadata+: env.metadata { namespace:: null },
- roleRef: {
- apiGroup: "rbac.authorization.k8s.io",
- kind: "ClusterRole",
- name: env.crs.global.metadata.name,
- },
- subjects: [
- {
- kind: "ServiceAccount",
- name: env.sa.metadata.name,
- namespace: env.sa.metadata.namespace,
- },
- ],
+ roleRef_: env.crs.global,
+ subjects_: [env.sa],
},
role: kube.Role("ceph-rook-system") {
@@ -245,18 +235,8 @@
rb: kube.RoleBinding("ceph-rook-system") {
metadata+: env.metadata,
- roleRef: {
- apiGroup: "rbac.authorization.k8s.io",
- kind: "Role",
- name: env.role.metadata.name,
- },
- subjects: [
- {
- kind: "ServiceAccount",
- name: env.sa.metadata.name,
- namespace: env.sa.metadata.namespace,
- },
- ],
+ roleRef_: env.role,
+ subjects_: [env.sa],
},
operator: kube.Deployment("rook-ceph-operator") {
@@ -369,23 +349,13 @@
rbs: [
kube.RoleBinding(cluster.name(el.name)) {
metadata+: cluster.metadata,
- roleRef: {
- apiGroup: "rbac.authorization.k8s.io",
- kind: el.role.kind,
- name: el.role.metadata.name,
- },
- subjects: [
- {
- kind: el.sa.kind,
- name: el.sa.metadata.name,
- namespace: el.sa.metadata.namespace,
- },
- ],
+ roleRef_: el.role,
+ subjects_: [el.sa],
},
for el in [
// Allow Operator SA to perform Cluster Mgmt in this namespace.
{ name: "cluster-mgmt", role: operator.crs.clusterMgmt, sa: operator.sa },
- { name: "osd", role: cluster.roles.osd, sa: cluster.sa.osd },
+ { name: "osd", role: cluster.roles.osd, sa: cluster.sa.osd },
{ name: "mgr", role: cluster.roles.mgr, sa: cluster.sa.mgr },
{ name: "mgr-cluster", role: operator.crs.mgrCluster, sa: cluster.sa.mgr },
]
@@ -395,18 +365,8 @@
metadata+: {
namespace: operator.cfg.namespace,
},
- roleRef: {
- apiGroup: "rbac.authorization.k8s.io",
- kind: cluster.roles.mgrSystem.kind,
- name: cluster.roles.mgrSystem.metadata.name,
- },
- subjects: [
- {
- kind: cluster.sa.mgr.kind,
- name: cluster.sa.mgr.metadata.name,
- namespace: cluster.sa.mgr.metadata.namespace,
- },
- ],
+ roleRef_: cluster.roles.mgrSystem,
+ subjects_: [cluster.sa.mgr],
},
cluster: kube._Object("ceph.rook.io/v1", "CephCluster", name) {
@@ -431,7 +391,7 @@
metadata+: cluster.metadata,
spec: {
ports: [
- { name: "dashboard", port: 80, targetPort: 8080, protocol: "TCP" },
+ { name: "dashboard", port: 80, targetPort: 8080, protocol: "TCP" },
],
selector: {
app: "rook-ceph-mgr",
@@ -466,7 +426,259 @@
}
],
},
- }
+ },
+
+ # Benji is a backup tool, external to rook, that we use for backing up
+ # RBDs.
+ benji: {
+ sa: kube.ServiceAccount(cluster.name("benji")) {
+ metadata+: cluster.metadata,
+ },
+
+ cr: kube.ClusterRole(cluster.name("benji")) {
+ rules: [
+ {
+ apiGroups: [""],
+ resources: [
+ "persistentvolumes",
+ "persistentvolumeclaims"
+ ],
+ verbs: ["list", "get"],
+ },
+ {
+ apiGroups: [""],
+ resources: [
+ "events",
+ ],
+ verbs: ["create", "update"],
+ },
+ ],
+ },
+
+ crb: kube.ClusterRoleBinding(cluster.name("benji")) {
+ roleRef_: cluster.benji.cr,
+ subjects_: [cluster.benji.sa],
+ },
+
+ config: kube.Secret(cluster.name("benji-config")) {
+ metadata+: cluster.metadata,
+ data_: {
+ "benji.yaml": std.manifestJson({
+ configurationVersion: '1',
+ databaseEngine: 'sqlite:////data/benji.sqlite',
+ defaultStorage: 'wasabi',
+ storages: [
+ {
+ name: "wasabi",
+ storageId: 1,
+ module: "s3",
+ configuration: cluster.spec.benji.s3Configuration {
+ activeTransforms: ["encrypt"],
+ },
+ },
+ ],
+ transforms: [
+ {
+ name: "encrypt",
+ module: "aes_256_gcm",
+ configuration: {
+ # not secret.
+ kdfSalt: "T2huZzZpcGhhaWM3QWVwaDhybzRhaDNhbzFpc2VpOWFobDNSZWVQaGVvTWV1bmVaYWVsNHRoYWg5QWVENHNoYWg0ZGFoN3Rlb3NvcHVuZzNpZXZpMm9vTG9vbmc1YWlmb0RlZXAwYmFobDlab294b2hjaG9odjRhbzFsYWkwYWk=",
+ kdfIterations: 2137,
+ password: cluster.spec.benji.encryptionPassword,
+ },
+ },
+ ],
+ ios: [
+ { name: pool, module: "rbd" }
+ for pool in cluster.spec.benji.pools
+ ],
+ }),
+ },
+ },
+
+ # Yes, Benji keeps data (backup metadata) on the ceph cluster that
+ # it backs up. However:
+ # - we add a command to benji-k8s to also copy over the sqlite
+ # database over to s3
+ # - benji can, in a pinch, restore without a database if a version
+ # is known: https://benji-backup.me/restore.html#restoring-without-a-database
+ data: kube.PersistentVolumeClaim(cluster.name("benji-data")) {
+ metadata+: cluster.metadata,
+ spec+: {
+ storageClassName: cluster.spec.benji.metadataStorageClass,
+ accessModes: [ "ReadWriteOnce" ],
+ resources: {
+ requests: {
+ storage: "1Gi",
+ },
+ },
+ },
+ },
+
+ # Extra scripts.
+ extrabins: kube.ConfigMap(cluster.name("benji-extrabins")) {
+ metadata+: cluster.metadata,
+ data: {
+ "metabackup.sh" : |||
+ # Make backups of sqlite3 metadata used by Benji.
+ # The backups live in the same bucket as backups, and the metabackups
+ # are named `metabackup-0..10`, where 0 is the newest backup. Any time
+ # this script is called, backups get shifted one way to the left (9 to 10,
+ # 8 to 9, etc). This ensures we have at least 10 backup replicas.
+
+ set -e
+
+ which s3cmd || pip install --upgrade s3cmd
+
+ AWS_ACCESS_KEY_ID=$(jq -r .storages[0].configuration.awsAccessKeyId < /etc/benji/benji.yaml)
+ AWS_SECRET_ACCESS_KEY=$(jq -r .storages[0].configuration.awsSecretAccessKey < /etc/benji/benji.yaml)
+ BUCKET=$(jq -r .storages[0].configuration.bucketName < /etc/benji/benji.yaml)
+
+ s3() {
+ s3cmd --host=s3.wasabisys.com \
+ "--host-bucket=%(bucket)s.s3.wasabisys.com" \
+ --region=eu-central-1 \
+ --access_key=$AWS_ACCESS_KEY_ID \
+ --secret_key=$AWS_SECRET_ACCESS_KEY \
+ "$@"
+ }
+
+ # Copy over old backups, if they exist.
+ for i in `seq 9 -1 0`; do
+ from="s3://$BUCKET/metabackup-$i.sqlite3"
+ to="s3://$BUCKET/metabackup-$((i+1)).sqlite3"
+
+ if [[ $(s3 ls $from | wc -l) -eq 0 ]]; then
+ echo "$from does not exist, skipping shift."
+ continue
+ fi
+ echo "Moving $from to $to..."
+ s3 mv $from $to
+ done
+
+ # Make new metabackup.
+ s3 put /data/benji.sqlite s3://$BUCKET/metabackup-0.sqlite3
+
+ |||,
+ "get-rook-creds.sh": |||
+ # Based on the Rook Toolbox /usr/local/bin/toolbox.sh script.
+ # Copyright 2016 The Rook Authors. All rights reserved.
+
+ CEPH_CONFIG="/etc/ceph/ceph.conf"
+ MON_CONFIG="/etc/rook/mon-endpoints"
+ KEYRING_FILE="/etc/ceph/keyring"
+
+ # create a ceph config file in its default location so ceph/rados tools can be used
+ # without specifying any arguments
+ write_endpoints() {
+ endpoints=$(cat ${MON_CONFIG})
+
+ # filter out the mon names
+ mon_endpoints=$(echo ${endpoints} | sed 's/[a-z]\+=//g')
+
+ # filter out the legacy mon names
+ mon_endpoints=$(echo ${mon_endpoints} | sed 's/rook-ceph-mon[0-9]\+=//g')
+
+ DATE=$(date)
+ echo "$DATE writing mon endpoints to ${CEPH_CONFIG}: ${endpoints}"
+ cat <<EOF > ${CEPH_CONFIG}
+ [global]
+ mon_host = ${mon_endpoints}
+
+ [client.admin]
+ keyring = ${KEYRING_FILE}
+ EOF
+ }
+
+ # watch the endpoints config file and update if the mon endpoints ever change
+ watch_endpoints() {
+ # get the timestamp for the target of the soft link
+ real_path=$(realpath ${MON_CONFIG})
+ initial_time=$(stat -c %Z ${real_path})
+ while true; do
+ real_path=$(realpath ${MON_CONFIG})
+ latest_time=$(stat -c %Z ${real_path})
+
+ if [[ "${latest_time}" != "${initial_time}" ]]; then
+ write_endpoints
+ initial_time=${latest_time}
+ fi
+ sleep 10
+ done
+ }
+
+ # create the keyring file
+ cat <<EOF > ${KEYRING_FILE}
+ [client.admin]
+ key = ${ROOK_ADMIN_SECRET}
+ EOF
+
+ # write the initial config file
+ write_endpoints
+
+ # continuously update the mon endpoints if they fail over
+ watch_endpoints &
+ |||
+ },
+ },
+
+ cronjob: kube.CronJob(cluster.name("benji")) {
+ metadata+: cluster.metadata,
+ spec+: { # CronJob Spec
+ schedule: "42 * * * *", # Hourly at 42 minute past.
+ jobTemplate+: {
+ spec+: { # Job Spec
+ selector:: null,
+ template+: {
+ spec+: { # PodSpec
+ serviceAccountName: cluster.benji.sa.metadata.name,
+ containers_: {
+ benji: kube.Container(cluster.name("benji")) {
+ # TODO(q3k): switch back to upstream after pull/52 goes in.
+ # Currently this is being built from github.com/q3k/benji.
+ # https://github.com/elemental-lf/benji/pull/52
+ image: "registry.k0.hswaw.net/q3k/benji-k8s:20190831-1351",
+ volumeMounts_: {
+ extrabins: { mountPath: "/usr/local/extrabins" },
+ monendpoints: { mountPath: "/etc/rook" },
+ benjiconfig: { mountPath: "/etc/benji" },
+ data: { mountPath: "/data" },
+ },
+ env_: {
+ ROOK_ADMIN_SECRET: { secretKeyRef: { name: "rook-ceph-mon", key: "admin-secret" }},
+ },
+ command: [
+ "bash", "-c", |||
+ bash /usr/local/extrabins/get-rook-creds.sh
+ benji-backup-pvc
+ benji-command enforce latest3,hours48,days7,months12
+ benji-command cleanup
+ bash /usr/local/extrabins/metabackup.sh
+ |||,
+ ],
+ },
+ },
+ volumes_: {
+ data: kube.PersistentVolumeClaimVolume(cluster.benji.data),
+ benjiconfig: kube.SecretVolume(cluster.benji.config),
+ extrabins: kube.ConfigMapVolume(cluster.benji.extrabins),
+ monendpoints: {
+ configMap: {
+ name: "rook-ceph-mon-endpoints",
+ items: [
+ { key: "data", path: "mon-endpoints" },
+ ],
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ },
},
ReplicatedBlockPool(cluster, name):: {