# Deploy Rook/Ceph Operator

local kube = import "../../../kube/kube.libsonnet";

{
    Operator: {
        local env = self,
        local cfg = env.cfg,
        cfg:: {
            image: "rook/ceph:v1.0.5",
            namespace: "rook-ceph-system",
        },

        metadata:: {
            namespace: cfg.namespace,
            labels: {
                "operator": "rook",
                "storage-backend": "ceph",
            },
        },

        namespace: kube.Namespace(cfg.namespace),

        crds: {
            cephclusters: kube.CustomResourceDefinition("ceph.rook.io", "v1", "CephCluster") {
                spec+: {
                    additionalPrinterColumns: [
                        { name: "DataDirHostPath", type: "string", description: "Directory used on the K8s nodes", JSONPath: ".spec.dataDirHostPath" },
                        { name: "MonCount", type: "string", description: "Number of MONs", JSONPath: ".spec.mon.count" },
                        { name: "Age", type: "date", JSONPath: ".metadata.creationTimestamp" },
                        { name: "State", type: "string", description: "Current State", JSONPath: ".status.state" },
                    ],
                    validation: {
                        # Converted from official operator YAML
                        "openAPIV3Schema": {
                            "properties": {
                                "spec": {
                                    "properties": {
                                        "cephVersion": {
                                            "properties": {
                                                "allowUnsupported": {
                                                    "type": "boolean"
                                                },
                                                "image": {
                                                    "type": "string"
                                                },
                                                "name": {
                                                    "pattern": "^(luminous|mimic|nautilus)$",
                                                    "type": "string"
                                                }
                                            }
                                        },
                                        "dashboard": {
                                            "properties": {
                                                "enabled": {
                                                    "type": "boolean"
                                                },
                                                "urlPrefix": {
                                                    "type": "string"
                                                },
                                                "port": {
                                                    "type": "integer"
                                                }
                                            }
                                        },
                                        "dataDirHostPath": {
                                            "pattern": "^/(\\S+)",
                                            "type": "string"
                                        },
                                        "mon": {
                                            "properties": {
                                                "allowMultiplePerNode": {
                                                    "type": "boolean"
                                                },
                                                "count": {
                                                    "maximum": 9,
                                                    "minimum": 1,
                                                    "type": "integer"
                                                },
                                                "preferredCount": {
                                                    "maximum": 9,
                                                    "minimum": 0,
                                                    "type": "integer"
                                                }
                                            },
                                            "required": [
                                                "count"
                                            ]
                                        },
                                        "network": {
                                            "properties": {
                                                "hostNetwork": {
                                                    "type": "boolean"
                                                }
                                            }
                                        },
                                        "storage": {
                                            "properties": {
                                                "nodes": {
                                                    "items": {},
                                                    "type": "array"
                                                },
                                                "useAllDevices": {},
                                                "useAllNodes": {
                                                    "type": "boolean"
                                                }
                                            }
                                        }
                                    },
                                    "required": [
                                        "mon"
                                    ]
                                }
                            }
                        }
                    }
                },
            },
            cephfilesystems: kube.CustomResourceDefinition("ceph.rook.io", "v1", "CephFilesystem") {
                spec+: {
                    additionalPrinterColumns: [
                        { name: "MdsCount", type: "string", description: "Number of MDs", JSONPath: ".spec.metadataServer.activeCount" },
                        { name: "Age", type: "date", JSONPath: ".metadata.creationTimestamp" },
                    ],
                },
            },
            cephnfses: kube.CustomResourceDefinition("ceph.rook.io", "v1", "CephNFS") {
                spec+: {
                    names+: {
                        plural: "cephnfses",
                        shortNames: ["nfs"],
                    },
                },
            },
            cephobjectstores: kube.CustomResourceDefinition("ceph.rook.io", "v1", "CephObjectStore"),
            cephobjectstoreusers: kube.CustomResourceDefinition("ceph.rook.io", "v1", "CephObjectStoreUser"),
            cephblockpools: kube.CustomResourceDefinition("ceph.rook.io", "v1", "CephBlockPool"),
            volumes: kube.CustomResourceDefinition("rook.io", "v1alpha2", "Volume") {
                spec+: {
                    names+: {
                        shortNames: ["rv"],
                    },
                },
            },
        },

        sa: kube.ServiceAccount("rook-ceph-system") {
            metadata+: env.metadata,
        },

        crs: {
            clusterMgmt: kube.ClusterRole("rook-ceph-cluster-mgmt") {
                metadata+: env.metadata { namespace:: null },
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["secrets", "pods", "pods/log", "services", "configmaps"],
                        verbs: ["get", "list", "watch", "patch", "create", "update", "delete"],
                    },
                    {
                        apiGroups: ["apps"],
                        resources: ["deployments", "daemonsets", "replicasets"],
                        verbs: ["get", "list", "watch", "create", "update", "delete"],
                    },
                ],
            },
            global: kube.ClusterRole("rook-ceph-global") {
                metadata+: env.metadata { namespace:: null },
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["pods", "nodes", "nodes/proxy"],
                        verbs: ["get", "list", "watch"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["events", "persistentvolumes", "persistentvolumeclaims", "endpoints"],
                        verbs: ["get", "list", "watch", "patch", "create", "update", "delete"],
                    },
                    {
                        apiGroups: ["storage.k8s.io"],
                        resources: ["storageclasses"],
                        verbs: ["get", "list", "watch", "create", "update", "delete"],
                    },
                    {
                        apiGroups: ["batch"],
                        resources: ["jobs"],
                        verbs: ["get", "list", "watch", "create", "update", "delete"],
                    },
                    {
                        apiGroups: ["ceph.rook.io"],
                        resources: ["*"],
                        verbs: ["*"],
                    },
                    {
                        apiGroups: ["rook.io"],
                        resources: ["*"],
                        verbs: ["*"],
                    },
                ],
            },
            mgrCluster: kube.ClusterRole("rook-ceph-mgr-cluster") {
                metadata+: env.metadata { namespace:: null },
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["configmaps", "nodes", "nodes/proxy"],
                        verbs: ["get", "list", "watch"],
                    },
                ]
            },
        },

        crb: kube.ClusterRoleBinding("ceph-rook-global") {
            metadata+: env.metadata { namespace:: null },
            roleRef_: env.crs.global,
            subjects_: [env.sa],
        },

        role: kube.Role("ceph-rook-system") {
            metadata+: env.metadata,
            rules: [
                {
                    apiGroups: [""],
                    resources: ["pods", "configmaps"],
                    verbs: ["get", "list", "watch", "patch", "create", "update", "delete"],
                },
                {
                    apiGroups: ["apps"],
                    resources: ["daemonsets"],
                    verbs: ["get", "list", "watch", "create", "update", "delete"],
                },
            ],
        },

        rb: kube.RoleBinding("ceph-rook-system") {
            metadata+: env.metadata,
            roleRef_: env.role,
            subjects_: [env.sa],
        },

        operator: kube.Deployment("rook-ceph-operator") {
            metadata+: env.metadata,
            spec+: {
                template+: {
                    spec+: {
                        serviceAccountName: env.sa.metadata.name,
                        containers_: {
                            operator: kube.Container("rook-ceph-operator") {
                                image: cfg.image,
                                args: ["ceph", "operator"],
                                volumeMounts_: {
                                    "rook-config": { mountPath: "/var/lib/rook" },
                                    "default-config-dir": { mountPath: "/etc/ceph" },
                                },
                                env_: {
                                    LIB_MODULES_DIR_PATH: "/run/current-system/kernel-modules/lib/modules/",
                                    ROOK_ALLOW_MULTIPLE_FILESYSTEMS: "false",
                                    ROOK_LOG_LEVEL: "INFO",
                                    ROOK_MON_HEALTHCHECK_INTERVAL: "45s",
                                    ROOK_MON_OUT_TIMEOUT: "600s",
                                    ROOK_DISCOVER_DEVICES_INTERVAL: "60m",
                                    ROOK_HOSTPATH_REQUIRES_PRIVILEGED: "false",
                                    ROOK_ENABLE_SELINUX_RELABELING: "true",
                                    ROOK_ENABLE_FSGROUP: "true",
                                    NODE_NAME: kube.FieldRef("spec.nodeName"),
                                    POD_NAME: kube.FieldRef("metadata.name"),
                                    POD_NAMESPACE: kube.FieldRef("metadata.namespace"),
                                },
                            },
                        },
                        volumes_: {
                            "rook-config": { emptyDir: {} },
                            "default-config-dir": { emptyDir: {} },
                        },
                    },
                },
            },
        },
    },

    // Create a new Ceph cluster in a new namespace.
    Cluster(operator, name):: {
        local cluster = self,
        spec:: error "please define cluster spec",


        metadata:: {
            namespace: name,
        },

        name(suffix):: cluster.metadata.namespace + "-" + suffix,

        namespace: kube.Namespace(cluster.metadata.namespace),

        sa: {
            // service accounts need to be hardcoded, see operator source.
            osd: kube.ServiceAccount("rook-ceph-osd") {
                metadata+: cluster.metadata,
            },
            mgr: kube.ServiceAccount("rook-ceph-mgr") {
                metadata+: cluster.metadata,
            },
        },

        roles: {
            osd: kube.Role(cluster.name("osd")) {
                metadata+: cluster.metadata,
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["configmaps"],
                        verbs: ["get", "list", "watch", "create", "update", "delete"],
                    }
                ],
            },
            mgr: kube.Role(cluster.name("mgr")) {
                metadata+: cluster.metadata,
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["pods", "services"],
                        verbs: ["get", "list", "watch"],
                    },
                    {
                        apiGroups: ["batch"],
                        resources: ["jobs"],
                        verbs: ["get", "list", "watch", "create", "update", "delete"],
                    },
                    {
                        apiGroups: ["ceph.rook.io"],
                        resources: ["*"],
                        verbs: ["*"],
                    },
                ],
            },
            mgrSystem: kube.ClusterRole(cluster.name("mgr-system")) {
                metadata+: cluster.metadata { namespace:: null },
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["configmaps"],
                        verbs: ["get", "list", "watch"],
                    }
                ],
            },
        },

        rbs: [
            kube.RoleBinding(cluster.name(el.name)) {
                metadata+: cluster.metadata,
                roleRef_: el.role,
                subjects_: [el.sa],
            },
            for el in [
                // Allow Operator SA to perform Cluster Mgmt in this namespace.
                { name: "cluster-mgmt", role: operator.crs.clusterMgmt, sa: operator.sa },
                { name: "osd", role: cluster.roles.osd, sa: cluster.sa.osd },
                { name: "mgr", role: cluster.roles.mgr, sa: cluster.sa.mgr },
                { name: "mgr-cluster", role: operator.crs.mgrCluster, sa: cluster.sa.mgr },
            ]
        ],

        mgrSystemRB: kube.RoleBinding(cluster.name("mgr-system")) {
            metadata+: {
                namespace: operator.cfg.namespace,
            },
            roleRef_: cluster.roles.mgrSystem,
            subjects_: [cluster.sa.mgr],
        },

        cluster: kube._Object("ceph.rook.io/v1", "CephCluster", name) {
            metadata+: cluster.metadata,
            spec: {
                cephVersion: {
                    # https://github.com/rook/rook/issues/2945#issuecomment-483964014
                    #image: "ceph/ceph:v13.2.5-20190319",
                    image: "ceph/ceph:v14.2.2-20190830",
                    allowUnsupported: true,
                },
                dataDirHostPath: "/var/lib/rook",
                dashboard: {
                    ssl: false,
                    enabled: true,
                    port: 8080,
                },
            } + cluster.spec,
        },

        dashboardService: kube.Service(cluster.name("dashboard")) {
            metadata+: cluster.metadata,
            spec: {
                ports: [
                    { name: "dashboard", port: 80, targetPort: 8080, protocol: "TCP" },
                ],
                selector: {
                    app: "rook-ceph-mgr",
                    rook_cluster: name,
                },
                type: "ClusterIP",
            },
        },

        dashboardIngress: kube.Ingress(cluster.name("dashboard")) {
            metadata+: cluster.metadata {
                annotations+: {
                    "kubernetes.io/tls-acme": "true",
                    "certmanager.k8s.io/cluster-issuer": "letsencrypt-prod",
                },
            },
            spec+: {
                tls: [
                    {
                        hosts: ["%s.hswaw.net" % name],
                        secretName: cluster.name("dashboard"),
                    },
                ],
                rules: [
                    {
                        host: "%s.hswaw.net" % name,
                        http: {
                            paths: [
                                { path: "/", backend: cluster.dashboardService.name_port },
                            ]
                        },
                    }
                ],
            },
        },

        # Benji is a backup tool, external to rook, that we use for backing up
        # RBDs.
        benji: {
            sa: kube.ServiceAccount(cluster.name("benji")) {
                metadata+: cluster.metadata,
            },

            cr: kube.ClusterRole(cluster.name("benji")) {
                rules: [
                    {
                        apiGroups: [""],
                        resources: [
                            "persistentvolumes",
                            "persistentvolumeclaims"
                        ],
                        verbs: ["list", "get"],
                    },
                    {
                        apiGroups: [""],
                        resources: [
                            "events",
                        ],
                        verbs: ["create", "update"],
                    },
                ],
            },

            crb: kube.ClusterRoleBinding(cluster.name("benji")) {
                roleRef_: cluster.benji.cr,
                subjects_: [cluster.benji.sa],
            },

            config: kube.Secret(cluster.name("benji-config")) {
                metadata+: cluster.metadata,
                data_: {
                    "benji.yaml": std.manifestJson({
                        configurationVersion: '1',
                        databaseEngine: 'sqlite:////data/benji.sqlite',
                        defaultStorage: 'wasabi',
                        storages: [
                            {
                                name: "wasabi",
                                storageId: 1,
                                module: "s3",
                                configuration: cluster.spec.benji.s3Configuration {
                                    activeTransforms: ["encrypt"],
                                },
                            },
                        ],
                        transforms: [
                            {
                                name: "encrypt",
                                module: "aes_256_gcm",
                                configuration: {
                                    # not secret.
                                    kdfSalt: "T2huZzZpcGhhaWM3QWVwaDhybzRhaDNhbzFpc2VpOWFobDNSZWVQaGVvTWV1bmVaYWVsNHRoYWg5QWVENHNoYWg0ZGFoN3Rlb3NvcHVuZzNpZXZpMm9vTG9vbmc1YWlmb0RlZXAwYmFobDlab294b2hjaG9odjRhbzFsYWkwYWk=",
                                    kdfIterations: 2137,
                                    password: cluster.spec.benji.encryptionPassword,
                                },
                            },
                        ],
                        ios: [
                            { name: pool, module: "rbd" }
                            for pool in cluster.spec.benji.pools
                        ],
                    }),
                },
            },

            # Yes, Benji keeps data (backup metadata) on the ceph cluster that
            # it backs up. However:
            #  - we add a command to benji-k8s to also copy over the sqlite
            #    database over to s3
            #  - benji can, in a pinch, restore without a database if a version
            #    is known: https://benji-backup.me/restore.html#restoring-without-a-database
            data: kube.PersistentVolumeClaim(cluster.name("benji-data")) {
                metadata+: cluster.metadata,
                spec+: {
                    storageClassName: cluster.spec.benji.metadataStorageClass,
                    accessModes: [ "ReadWriteOnce" ],
                    resources: {
                        requests: {
                            storage: "1Gi",
                        },
                    },
                },
            },

            # Extra scripts.
            extrabins: kube.ConfigMap(cluster.name("benji-extrabins")) {
                metadata+: cluster.metadata,
                data: {
                    "metabackup.sh" : |||
                        # Make backups of sqlite3 metadata used by Benji.
                        # The backups live in the same bucket as backups, and the metabackups
                        # are named `metabackup-0..10`, where 0 is the newest backup. Any time
                        # this script is called, backups get shifted one way to the left (9 to 10,
                        # 8 to 9, etc). This ensures we have at least 10 backup replicas.

                        set -e

                        which s3cmd || pip install --upgrade s3cmd

                        AWS_ACCESS_KEY_ID=$(jq -r .storages[0].configuration.awsAccessKeyId < /etc/benji/benji.yaml)
                        AWS_SECRET_ACCESS_KEY=$(jq -r .storages[0].configuration.awsSecretAccessKey < /etc/benji/benji.yaml)
                        BUCKET=$(jq -r .storages[0].configuration.bucketName < /etc/benji/benji.yaml)

                        s3() {
                            s3cmd --host=s3.wasabisys.com \
                                "--host-bucket=%(bucket)s.s3.wasabisys.com" \
                                --region=eu-central-1 \
                                --access_key=$AWS_ACCESS_KEY_ID \
                                --secret_key=$AWS_SECRET_ACCESS_KEY \
                                "$@"
                        }

                        # Copy over old backups, if they exist.
                        for i in `seq 9 -1 0`; do
                            from="s3://$BUCKET/metabackup-$i.sqlite3"
                            to="s3://$BUCKET/metabackup-$((i+1)).sqlite3"

                            if [[ $(s3 ls $from | wc -l) -eq 0 ]]; then
                                echo "$from does not exist, skipping shift."
                                continue
                            fi
                            echo "Moving $from to $to..."
                            s3 mv $from $to
                        done

                        # Make new metabackup.
                        s3 put /data/benji.sqlite s3://$BUCKET/metabackup-0.sqlite3

                    |||,
                    "get-rook-creds.sh": |||
                        # Based on the Rook Toolbox /usr/local/bin/toolbox.sh script.
                        # Copyright 2016 The Rook Authors. All rights reserved.

                        CEPH_CONFIG="/etc/ceph/ceph.conf"
                        MON_CONFIG="/etc/rook/mon-endpoints"
                        KEYRING_FILE="/etc/ceph/keyring"

                        # create a ceph config file in its default location so ceph/rados tools can be used
                        # without specifying any arguments
                        write_endpoints() {
                            endpoints=$(cat ${MON_CONFIG})

                            # filter out the mon names
                            mon_endpoints=$(echo ${endpoints} | sed 's/[a-z]\+=//g')

                            # filter out the legacy mon names
                            mon_endpoints=$(echo ${mon_endpoints} | sed 's/rook-ceph-mon[0-9]\+=//g')

                            DATE=$(date)
                            echo "$DATE writing mon endpoints to ${CEPH_CONFIG}: ${endpoints}"
                            cat <<EOF > ${CEPH_CONFIG}
                        [global]
                        mon_host = ${mon_endpoints}

                        [client.admin]
                        keyring = ${KEYRING_FILE}
                        EOF
                        }

                        # watch the endpoints config file and update if the mon endpoints ever change
                        watch_endpoints() {
                            # get the timestamp for the target of the soft link
                            real_path=$(realpath ${MON_CONFIG})
                            initial_time=$(stat -c %Z ${real_path})
                            while true; do
                               real_path=$(realpath ${MON_CONFIG})
                               latest_time=$(stat -c %Z ${real_path})

                               if [[ "${latest_time}" != "${initial_time}" ]]; then
                                 write_endpoints
                                 initial_time=${latest_time}
                               fi
                               sleep 10
                            done
                        }

                        # create the keyring file
                        cat <<EOF > ${KEYRING_FILE}
                        [client.admin]
                        key = ${ROOK_ADMIN_SECRET}
                        EOF

                        # write the initial config file
                        write_endpoints

                        # continuously update the mon endpoints if they fail over
                        watch_endpoints &
                    |||
                },
            },

            cronjob: kube.CronJob(cluster.name("benji")) {
                metadata+: cluster.metadata,
                spec+: { # CronJob Spec
                    schedule: "42 * * * *", # Hourly at 42 minute past.
                    jobTemplate+: {
                        spec+: { # Job Spec
                            selector:: null,
                            template+: {
                                spec+: { # PodSpec
                                    serviceAccountName: cluster.benji.sa.metadata.name,
                                    containers_: {
                                        benji: kube.Container(cluster.name("benji")) {
                                            # TODO(q3k): switch back to upstream after pull/52 goes in.
                                            # Currently this is being built from github.com/q3k/benji.
                                            # https://github.com/elemental-lf/benji/pull/52
                                            image: "registry.k0.hswaw.net/q3k/benji-k8s:20190831-1351",
                                            volumeMounts_: {
                                                extrabins: { mountPath: "/usr/local/extrabins" },
                                                monendpoints: { mountPath: "/etc/rook" },
                                                benjiconfig: { mountPath: "/etc/benji" },
                                                data: { mountPath: "/data" },
                                            },
                                            env_: {
                                                ROOK_ADMIN_SECRET: { secretKeyRef: { name: "rook-ceph-mon", key: "admin-secret" }},
                                            },
                                            command: [
                                                "bash", "-c", |||
                                                    bash /usr/local/extrabins/get-rook-creds.sh
                                                    benji-backup-pvc
                                                    benji-command enforce latest3,hours48,days7,months12
                                                    benji-command cleanup
                                                    bash /usr/local/extrabins/metabackup.sh
                                                |||,
                                            ],
                                        },
                                    },
                                    volumes_: {
                                        data: kube.PersistentVolumeClaimVolume(cluster.benji.data),
                                        benjiconfig: kube.SecretVolume(cluster.benji.config),
                                        extrabins: kube.ConfigMapVolume(cluster.benji.extrabins),
                                        monendpoints: {
                                            configMap: {
                                                name: "rook-ceph-mon-endpoints",
                                                items: [
                                                    { key: "data", path: "mon-endpoints" },
                                                ],
                                            },
                                        },
                                    },
                                },
                            },
                        },
                    },
                },
            },
        },
    },

    ReplicatedBlockPool(cluster, name):: {
        local pool = self,
        name:: name,

        spec:: error "spec must be specified",

        pool: kube._Object("ceph.rook.io/v1", "CephBlockPool", name) {
            metadata+: cluster.metadata,
            spec: pool.spec,
        },

        storageClass: kube.StorageClass(name) {
            provisioner: "ceph.rook.io/block",
            parameters: {
                blockPool: pool.pool.metadata.name,
                clusterNamespace: pool.pool.metadata.namespace,
                fstype: "ext4",
            },
            reclaimPolicy: "Retain",
        },
    },

    ECBlockPool(cluster, name):: {
        local pool = self,
        name:: name,

        spec:: error "spec must be specified",

        pool: kube._Object("ceph.rook.io/v1", "CephBlockPool", name) {
            metadata+: cluster.metadata,
            spec: pool.spec,
        },
        metapool: kube._Object("ceph.rook.io/v1", "CephBlockPool", name + "-metadata") {
            metadata+: cluster.metadata,
            spec: {
                failureDomain: "host",
                replicated: {
                    size: 3,
                },
            },
        },

        storageClass: kube.StorageClass(name) {
            provisioner: "ceph.rook.io/block",
            parameters: {
                blockPool: pool.metapool.metadata.name,
                dataBlockPool: pool.pool.metadata.name,
                clusterNamespace: pool.pool.metadata.namespace,
                fstype: "ext4",
            },
            reclaimPolicy: "Retain",
        },
    },

    S3ObjectStore(cluster, name):: {
        local store = self,
        spec:: error "spec must be specified",
        objectStore: kube._Object("ceph.rook.io/v1", "CephObjectStore", name) {
            metadata+: cluster.metadata,
            spec: store.spec {
                gateway: {
                    type: "s3",
                    port: 80,
                    instances: 1,
                    allNodes: false,
                },
            },
        },

        objectIngress: kube.Ingress(name) {
            metadata+: cluster.metadata {
                annotations+: {
                    "kubernetes.io/tls-acme": "true",
                    "certmanager.k8s.io/cluster-issuer": "letsencrypt-prod",
                    "nginx.ingress.kubernetes.io/proxy-body-size": "0",
                },
            },
            spec+: {
                tls: [
                    {
                        hosts: ["object.%s.hswaw.net" % [cluster.metadata.namespace]],
                        secretName: "%s-tls" % [name],
                    },
                ],
                rules: [
                    {
                        host: "object.%s.hswaw.net" % [cluster.metadata.namespace],
                        http: {
                            paths: [
                                {
                                    path: "/",
                                    backend: {
                                        serviceName: "rook-ceph-rgw-%s" % [name],
                                        servicePort: 80,
                                    },
                                },
                            ]
                        },
                    }
                ],
            },
        },
    },
}
