# Deploy Rook/Ceph Operator

local kube = import "../../../kube/kube.libsonnet";
local policies = import "../../../kube/policies.libsonnet";

local oa = kube.OpenAPI;

{
    Operator: {
        local env = self,
        local cfg = env.cfg,
        cfg:: {
            image: "rook/ceph:v1.6.9",
            namespace: "rook-ceph-system",
        },

        metadata:: {
            namespace: cfg.namespace,
            labels: {
                "operator": "rook",
                "storage-backend": "ceph",
            },
        },

        namespace: kube.Namespace(cfg.namespace),

        policyInsecure: policies.AllowNamespaceInsecure(cfg.namespace),

        // Grab CRDs from upstream YAML.
        //
        // We use a bit of jsonnet to remove some fields that kubebuilder (used
        // upstream) added and to override preserveUnknownFIelds (as some older
        // deployment apparently set it to true, which doesn't work for new
        // CRDs that have default values).
        crds: [
            (el {
                metadata+: {
                    annotations:: null,
                    creationTimestamp:: null,
                },
                status:: null,
                spec+: {
                    preserveUnknownFields: false,
                },
            })
            for el in (std.native("parseYaml")(importstr "rook-crds.yaml")) if el != null
        ],

        sa: {
            system: kube.ServiceAccount("rook-ceph-system") {
                metadata+: env.metadata,
            },
            csiCephfsPlugin: kube.ServiceAccount("rook-csi-cephfs-plugin-sa") {
                metadata+: env.metadata,
            },
            csiCephfsProvisioner: kube.ServiceAccount("rook-csi-cephfs-provisioner-sa") {
                metadata+: env.metadata,
            },
            csiRbdPlugin: kube.ServiceAccount("rook-csi-rbd-plugin-sa") {
                metadata+: env.metadata,
            },
            csiRbdProvisioner: kube.ServiceAccount("rook-csi-rbd-provisioner-sa") {
                metadata+: env.metadata,
            },
        },

        crs: {
            clusterMgmt: kube.ClusterRole("rook-ceph-cluster-mgmt") {
                metadata+: env.metadata { namespace:: null },
                rules: [
                    {
                        apiGroups: ["", "apps", "extensions"],
                        resources: ["secrets", "pods", "pods/log", "services", "configmaps", "deployments", "daemonsets"],
                        verbs: ["get", "list", "watch", "patch", "create", "update", "delete"],
                    },
                ],
            },
            global: kube.ClusterRole("rook-ceph-global") {
                metadata+: env.metadata {
                    namespace:: null,
                    labels+: {
                        "rbac.ceph.rook.io/aggregate-to-rook-ceph-global": "true",
                    },
                },
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["pods", "nodes", "nodes/proxy", "services"],
                        verbs: ["get", "list", "watch"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["events", "persistentvolumes", "persistentvolumeclaims", "endpoints"],
                        verbs: ["get", "list", "watch", "patch", "create", "update", "delete"],
                    },
                    {
                        apiGroups: ["storage.k8s.io"],
                        resources: ["storageclasses"],
                        verbs: ["get", "list", "watch"],
                    },
                    {
                        apiGroups: ["batch"],
                        resources: ["jobs", "cronjobs"],
                        verbs: ["get", "list", "watch", "create", "update", "delete"],
                    },
                    {
                        apiGroups: ["ceph.rook.io"],
                        resources: ["*"],
                        verbs: ["*"],
                    },
                    {
                        apiGroups: ["rook.io"],
                        resources: ["*"],
                        verbs: ["*"],
                    },
                    {
                        apiGroups: ["policy", "apps", "extensions"],
                        resources: ["poddisruptionbudgets", "deployments", "replicasets"],
                        verbs: ["*"],
                    },
                    {
                        apiGroups: ["healthchecking.openshift.io"],
                        resources: ["machinedisruptionbudgets"],
                        verbs: ["get", "list", "watch", "create", "update", "delete"],
                    },
                    {
                        apiGroups: ["machine.openshift.io"],
                        resources: ["machines"],
                        verbs: ["get", "list", "watch", "create", "update", "delete"],
                    },
                    {
                        apiGroups: ["storage.k8s.io"],
                        resources: ["csidrivers"],
                        verbs: ["create", "delete", "get", "update"],
                    },
                    {
                        apiGroups: ["k8s.cni.cncf.io"],
                        resources: ["network-attachment-definitions"],
                        verbs: ["get"],
                    },
                ],
            },

            mgrCluster: kube.ClusterRole("rook-ceph-mgr-cluster") {
                metadata+: env.metadata { namespace:: null },
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["configmaps", "nodes", "nodes/proxy"],
                        verbs: ["get", "list", "watch"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["events"],
                        verbs: ["create", "patch", "list", "get", "watch"],
                    },
                ]
            },
            objectBucket: kube.ClusterRole("rook-ceph-object-bucket") {
                metadata+: env.metadata {
                    namespace:: null,
                },
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["secrets", "configmaps"],
                        verbs: ["*"],
                    },
                    {
                        apiGroups: ["storage.k8s.io"],
                        resources: ["storageclasses"],
                        verbs: ["get", "list", "watch"],
                    },
                    {
                        apiGroups: ["objectbucket.io"],
                        resources: ["*"],
                        verbs: ["*"],
                    },
                ],
            },

            cephfsCSINodeplugin: kube.ClusterRole("cephfs-csi-nodeplugin") {
                metadata+: env.metadata { namespace:: null },
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["nodes"],
                        verbs: ["get", "list", "update"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["namespaces"],
                        verbs: ["get", "list"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["persistentvolumes"],
                        verbs: ["get", "list", "watch", "update"],
                    },
                    {
                        apiGroups: ["storage.k8s.io"],
                        resources: ["volumeattachments"],
                        verbs: ["get", "list", "watch", "update"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["configmaps"],
                        verbs: ["get", "list"],
                    },
                ],
            },

            cephfsExternalProvisionerRunner: kube.ClusterRole("cephfs-external-provisioner-runner") {
                metadata+: env.metadata {
                    namespace:: null,
                    labels+: {
                        "rbac.ceph.rook.io/aggregate-to-cephfs-external-provisioner-runner": "true",
                    },
                },
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["secrets"],
                        verbs: ["get", "list"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["persistentvolumes"],
                        verbs: ["get", "list", "watch", "create", "delete", "update", "patch"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["persistentvolumeclaims"],
                        verbs: ["get", "list", "watch", "update"],
                    },
                    {
                        apiGroups: ["storage.k8s.io"],
                        resources: ["storageclasses"],
                        verbs: ["get", "list", "watch"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["events"],
                        verbs: ["list", "watch", "create", "update", "patch"],
                    },
                    {
                        apiGroups: ["snapshot.storage.k8s.io"],
                        resources: ["volumesnapshots"],
                        verbs: ["get", "list", "watch", "update"],
                    },
                    {
                        apiGroups: ["snapshot.storage.k8s.io"],
                        resources: ["volumesnapshotcontents"],
                        verbs: ["create", "get", "list", "watch", "update", "delete"],
                    },
                    {
                        apiGroups: ["snapshot.storage.k8s.io"],
                        resources: ["volumesnapshotclasses"],
                        verbs: ["get", "list", "watch"],
                    },
                    {
                        apiGroups: ["snapshot.storage.k8s.io"],
                        resources: ["volumesnapshotcontents/status"],
                        verbs: ["update"],
                    },
                    {
                        apiGroups: ["apiextensions.k8s.io"],
                        resources: ["customresourcedefinitions"],
                        verbs: ["create", "list", "watch", "delete", "get", "update"],
                    },
                    {
                        apiGroups: ["snapshot.storage.k8s.io"],
                        resources: ["volumesnapshots/status"],
                        verbs: ["update"],
                    },
                    {
                        apiGroups: ["storage.k8s.io"],
                        resources: ["volumeattachments"],
                        verbs: ["get", "list", "watch", "update", "patch"],
                    },
                    {
                        apiGroups: ["storage.k8s.io"],
                        resources: ["volumeattachments/status"],
                        verbs: ["patch"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["nodes"],
                        verbs: ["get", "list", "watch"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["persistentvolumeclaims/status"],
                        verbs: ["update", "patch"],
                    },
                ],
            },

            rbdCSINodeplugin: kube.ClusterRole("rbd-csi-nodeplugin") {
                metadata+: env.metadata { namespace:: null },
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["secrets"],
                        verbs: ["get", "list"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["nodes"],
                        verbs: ["get", "list", "update"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["namespaces"],
                        verbs: ["get", "list"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["persistentvolumes"],
                        verbs: ["get", "list", "watch", "update"],
                    },
                    {
                        apiGroups: ["storage.k8s.io"],
                        resources: ["volumeattachments"],
                        verbs: ["get", "list", "watch", "update"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["configmaps"],
                        verbs: ["get", "list"],
                    },
                ],
            },

            rbdExternalProvisionerRunner: kube.ClusterRole("rbd-external-provisioner-runner") {
                metadata+: env.metadata {
                    namespace:: null,
                    labels+: {
                        "rbac.ceph.rook.io/aggregate-to-rbd-external-provisioner-runner": "true",
                    },
                },
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["secrets"],
                        verbs: ["get", "list", "watch"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["persistentvolumes"],
                        verbs: ["get", "list", "watch", "create", "delete", "update", "patch"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["persistentvolumeclaims"],
                        verbs: ["get", "list", "watch", "update"],
                    },
                    {
                        apiGroups: ["storage.k8s.io"],
                        resources: ["volumeattachments"],
                        verbs: ["get", "list", "watch", "update", "patch"],
                    },
                    {
                        apiGroups: ["storage.k8s.io"],
                        resources: ["volumeattachments/status"],
                        verbs: ["patch"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["nodes"],
                        verbs: ["get", "list", "watch"],
                    },
                    {
                        apiGroups: ["storage.k8s.io"],
                        resources: ["storageclasses"],
                        verbs: ["get", "list", "watch"]
                    },
                    {
                        apiGroups: [""],
                        resources: ["events"],
                        verbs: ["list", "watch", "create", "update", "patch"],
                    },
                    {
                        apiGroups: ["snapshot.storage.k8s.io"],
                        resources: ["volumesnapshots"],
                        verbs: ["get", "list", "watch", "update"],
                    },
                    {
                        apiGroups: ["snapshot.storage.k8s.io"],
                        resources: ["volumesnapshotcontents"],
                        verbs: ["create", "get", "list", "watch", "update", "delete"],
                    },
                    {
                        apiGroups: ["snapshot.storage.k8s.io"],
                        resources: ["volumesnapshotclasses"],
                        verbs: ["get", "list", "watch"],
                    },
                    {
                        apiGroups: ["snapshot.storage.k8s.io"],
                        resources: ["volumesnapshotcontents/status"],
                        verbs: ["update"],
                    },
                    {
                        apiGroups: ["apiextensions.k8s.io"],
                        resources: ["customresourcedefinitions"],
                        verbs: ["create", "list", "watch", "delete", "get", "update"],
                    },
                    {
                        apiGroups: ["snapshot.storage.k8s.io"],
                        resources: ["volumesnapshots/status"],
                        verbs: ["update"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["persistentvolumeclaims/status"],
                        verbs: ["update", "patch"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["configmaps"],
                        verbs: ["get"],
                    },
                    {
                        apiGroups: ["replication.storage.openshift.io"],
                        resources: ["volumereplications", "volumereplicationclasses"],
                        verbs: ["create", "delete", "get", "list", "patch", "update", "watch"],
                    },
                    {
                        apiGroups: ["replication.storage.openshift.io"],
                        resources: ["volumereplications/finalizers"],
                        verbs: ["update"],
                    },
                    {
                        apiGroups: ["replication.storage.openshift.io"],
                        resources: ["volumereplications/status"],
                        verbs: ["get", "patch", "update"],
                    },
                    {
                        apiGroups: ["replication.storage.openshift.io"],
                        resources: ["volumereplicationclasses/status"],
                        verbs: ["get"],
                    },
                ],
            },
        },

        crbs: {
            global: kube.ClusterRoleBinding("ceph-rook-global") {
                metadata+: env.metadata { namespace:: null },
                roleRef_: env.crs.global,
                subjects_: [env.sa.system],
            },
            objectBucket: kube.ClusterRoleBinding("rook-ceph-object-bucket") {
                metadata+: env.metadata { namespace:: null },
                roleRef_: env.crs.objectBucket,
                subjects_: [env.sa.system],
            },
            cephfsCSINodeplugin: kube.ClusterRoleBinding("cephfs-csi-nodeplugin") {
                metadata+: env.metadata { namespace:: null },
                roleRef_: env.crs.cephfsCSINodeplugin,
                subjects_: [env.sa.csiCephfsPlugin],
            },
            cephfsCSIProvisioner: kube.ClusterRoleBinding("cephfs-csi-provisioner") {
                metadata+: env.metadata { namespace:: null },
                roleRef_: env.crs.cephfsExternalProvisionerRunner,
                subjects_: [env.sa.csiCephfsProvisioner],
            },
            rbdCSINodeplugin: kube.ClusterRoleBinding("rbd-csi-nodeplugin") {
                metadata+: env.metadata { namespace:: null },
                roleRef_: env.crs.rbdCSINodeplugin,
                subjects_: [env.sa.csiRbdPlugin],
            },
            rbdCSIProvisioner: kube.ClusterRoleBinding("rbd-csi-provisioner") {
                metadata+: env.metadata { namespace:: null },
                roleRef_: env.crs.rbdExternalProvisionerRunner,
                subjects_: [env.sa.csiRbdProvisioner],
            },
        },

        roles: {
            system: kube.Role("ceph-rook-system") {
                metadata+: env.metadata,
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["pods", "configmaps", "services"],
                        verbs: ["get", "list", "watch", "patch", "create", "update", "delete"],
                    },
                    {
                        apiGroups: ["apps"],
                        resources: ["daemonsets", "statefulsets", "deployments"],
                        verbs: ["get", "list", "watch", "create", "update", "delete"],
                    },
                    {
                        apiGroups: ["k8s.cni.cncf.io"],
                        resources: ["network-attachment-definitions"],
                        verbs: ["get"],
                    },
                ],
            },
            cephfsExternalProvisioner: kube.Role("cephfs-external-provisioner-cfg") {
                metadata+: env.metadata,
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["endpoints"],
                        verbs: ["get", "watch", "list", "delete", "update", "create"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["configmaps"],
                        verbs: ["get", "list", "create", "delete"],
                    },
                    {
                        apiGroups: ["coordination.k8s.io"],
                        resources: ["leases"],
                        verbs: ["get" ,"watch", "list", "delete", "update", "create"],
                    },
                ],
            },
            rbdExternalProvisioner: kube.Role("rbd-external-provisioner-cfg") {
                metadata+: env.metadata,
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["endpoints"],
                        verbs: ["get", "watch", "list", "delete", "update", "create"],
                    },
                    {
                        apiGroups: [""],
                        resources: ["configmaps"],
                        verbs: ["get", "list", "watch", "create", "delete", "update"],
                    },
                    {
                        apiGroups: ["coordination.k8s.io"],
                        resources: ["leases"],
                        verbs: ["get" ,"watch", "list", "delete", "update", "create"],
                    },
                ],
            },
        },

        rbs: {
            system: kube.RoleBinding("ceph-rook-system") {
                metadata+: env.metadata,
                roleRef_: env.roles.system,
                subjects_: [env.sa.system],
            },
            cephfsCSIProvisioner: kube.RoleBinding("cephfs-csi-provisioner-role-cfg") {
                metadata+: env.metadata,
                roleRef_: env.roles.cephfsExternalProvisioner,
                subjects_: [env.sa.csiCephfsProvisioner],
            },
            rbdCSIProvisioner: kube.RoleBinding("rbd-csi-provisioner-role-cfg") {
                metadata+: env.metadata,
                roleRef_: env.roles.rbdExternalProvisioner,
                subjects_: [env.sa.csiRbdProvisioner],
            },
        },

        operator: kube.Deployment("rook-ceph-operator") {
            metadata+: env.metadata,
            spec+: {
                template+: {
                    spec+: {
                        serviceAccountName: env.sa.system.metadata.name,
                        containers_: {
                            operator: kube.Container("rook-ceph-operator") {
                                image: cfg.image,
                                args: ["ceph", "operator"],
                                volumeMounts_: {
                                    "rook-config": { mountPath: "/var/lib/rook" },
                                    "default-config-dir": { mountPath: "/etc/ceph" },
                                },
                                env_: {
                                    LIB_MODULES_DIR_PATH: "/run/current-system/kernel-modules/lib/modules/",
                                    ROOK_ALLOW_MULTIPLE_FILESYSTEMS: "false",
                                    ROOK_LOG_LEVEL: "INFO",
                                    ROOK_MON_HEALTHCHECK_INTERVAL: "45s",
                                    ROOK_MON_OUT_TIMEOUT: "600s",
                                    ROOK_DISCOVER_DEVICES_INTERVAL: "60m",
                                    ROOK_HOSTPATH_REQUIRES_PRIVILEGED: "false",
                                    ROOK_ENABLE_SELINUX_RELABELING: "true",
                                    ROOK_ENABLE_FSGROUP: "true",
                                    NODE_NAME: kube.FieldRef("spec.nodeName"),
                                    POD_NAME: kube.FieldRef("metadata.name"),
                                    POD_NAMESPACE: kube.FieldRef("metadata.namespace"),
                                    ROOK_CSI_KUBELET_DIR_PATH: "/var/lib/kubernetes",
                                    ROOK_ENABLE_FLEX_DRIVER: "true",
                                },
                            },
                        },
                        volumes_: {
                            "rook-config": { emptyDir: {} },
                            "default-config-dir": { emptyDir: {} },
                        },
                    },
                },
            },
        },
    },

    // Create a new Ceph cluster in a new namespace.
    Cluster(operator, name):: {
        local cluster = self,
        spec:: error "please define cluster spec",


        metadata:: {
            namespace: name,
        },

        name(suffix):: cluster.metadata.namespace + "-" + suffix,

        namespace: kube.Namespace(cluster.metadata.namespace),

        sa: {
            // service accounts need to be hardcoded, see operator source.
            osd: kube.ServiceAccount("rook-ceph-osd") {
                metadata+: cluster.metadata,
            },
            mgr: kube.ServiceAccount("rook-ceph-mgr") {
                metadata+: cluster.metadata,
            },
            cmdReporter: kube.ServiceAccount("rook-ceph-cmd-reporter") {
                metadata+: cluster.metadata,
            },
        },

        roles: {
            osd: kube.Role(cluster.name("osd")) {
                metadata+: cluster.metadata,
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["configmaps"],
                        verbs: ["get", "list", "watch", "create", "update", "delete"],
                    },
                ],
            },
            osdCluster: kube.ClusterRole(cluster.name("osd-cluster")) {
                metadata+: cluster.metadata { namespace:: null },
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["nodes"],
                        verbs: ["get", "list"],
                    },
                ],
            },
            mgr: kube.Role(cluster.name("mgr")) {
                metadata+: cluster.metadata,
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["pods", "services"],
                        verbs: ["get", "list", "watch"],
                    },
                    {
                        apiGroups: ["batch"],
                        resources: ["jobs"],
                        verbs: ["get", "list", "watch", "create", "update", "delete"],
                    },
                    {
                        apiGroups: ["ceph.rook.io"],
                        resources: ["*"],
                        verbs: ["*"],
                    },
                ],
            },
            cmdReporter: kube.Role(cluster.name("cmd-reporter")) {
                metadata+: cluster.metadata,
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["pods", "configmaps"],
                        verbs: ["get", "list", "watch", "create", "update", "delete"],
                    },
                ],
            },
            mgrSystem: kube.ClusterRole(cluster.name("mgr-system")) {
                metadata+: cluster.metadata { namespace:: null },
                rules: [
                    {
                        apiGroups: [""],
                        resources: ["configmaps"],
                        verbs: ["get", "list", "watch"],
                    }
                ],
            },
        },

        rbs: [
            kube.RoleBinding(cluster.name(el.name)) {
                metadata+: cluster.metadata,
                roleRef_: el.role,
                subjects_: [el.sa],
            },
            for el in [
                // Allow Operator SA to perform Cluster Mgmt in this namespace.
                { name: "cluster-mgmt", role: operator.crs.clusterMgmt, sa: operator.sa.system },
                { name: "osd", role: cluster.roles.osd, sa: cluster.sa.osd },
                { name: "mgr", role: cluster.roles.mgr, sa: cluster.sa.mgr },
                { name: "cmd-reporter", role: cluster.roles.cmdReporter, sa: cluster.sa.cmdReporter },
                { name: "mgr-cluster", role: operator.crs.mgrCluster, sa: cluster.sa.mgr },
            ]
        ],

        mgrSystemRB: kube.RoleBinding(cluster.name("mgr-system")) {
            metadata+: {
                namespace: operator.cfg.namespace,
            },
            roleRef_: cluster.roles.mgrSystem,
            subjects_: [cluster.sa.mgr],
        },

        osdClusterRB: kube.ClusterRoleBinding(cluster.name("osd-cluster")) {
            metadata+: {
                namespace:: null,
            },
            roleRef_: cluster.roles.osdCluster,
            subjects_: [cluster.sa.osd],
        },


        cluster: kube._Object("ceph.rook.io/v1", "CephCluster", name) {
            metadata+: cluster.metadata,
            spec: {
                cephVersion: {
                    image: "quay.io/ceph/ceph:v16.2.5",
                    allowUnsupported: true,
                },
                dataDirHostPath: if name == "ceph-waw2" then "/var/lib/rook" else "/var/lib/rook-%s" % [name],
                dashboard: {
                    ssl: false,
                    enabled: true,
                    port: 8080,
                },
            } + cluster.spec,
        },

        dashboardService: kube.Service(cluster.name("dashboard")) {
            metadata+: cluster.metadata,
            spec: {
                ports: [
                    { name: "dashboard", port: 80, targetPort: 8080, protocol: "TCP" },
                ],
                selector: {
                    app: "rook-ceph-mgr",
                    rook_cluster: name,
                },
                type: "ClusterIP",
            },
        },

        dashboardIngress: kube.Ingress(cluster.name("dashboard")) {
            metadata+: cluster.metadata {
                annotations+: {
                    "kubernetes.io/tls-acme": "true",
                    "certmanager.k8s.io/cluster-issuer": "letsencrypt-prod",
                },
            },
            spec+: {
                tls: [
                    {
                        hosts: ["%s.hswaw.net" % name],
                        secretName: cluster.name("dashboard"),
                    },
                ],
                rules: [
                    {
                        host: "%s.hswaw.net" % name,
                        http: {
                            paths: [
                                { path: "/", backend: cluster.dashboardService.name_port },
                            ]
                        },
                    }
                ],
            },
        },

        # Benji is a backup tool, external to rook, that we use for backing up
        # RBDs.
        benji: {
            sa: kube.ServiceAccount(cluster.name("benji")) {
                metadata+: cluster.metadata,
            },

            cr: kube.ClusterRole(cluster.name("benji")) {
                rules: [
                    {
                        apiGroups: [""],
                        resources: [
                            "persistentvolumes",
                            "persistentvolumeclaims"
                        ],
                        verbs: ["list", "get"],
                    },
                    {
                        apiGroups: [""],
                        resources: [
                            "events",
                        ],
                        verbs: ["create", "update"],
                    },
                ],
            },

            crb: kube.ClusterRoleBinding(cluster.name("benji")) {
                roleRef_: cluster.benji.cr,
                subjects_: [cluster.benji.sa],
            },

            config: kube.Secret(cluster.name("benji-config")) {
                metadata+: cluster.metadata,
                data_: {
                    "benji.yaml": std.manifestJson({
                        configurationVersion: '1',
                        databaseEngine: 'sqlite:////data/benji.sqlite',
                        defaultStorage: 'wasabi',
                        storages: [
                            {
                                name: "wasabi",
                                storageId: 1,
                                module: "s3",
                                configuration: cluster.spec.benji.s3Configuration {
                                    activeTransforms: ["encrypt"],
                                },
                            },
                        ],
                        transforms: [
                            {
                                name: "encrypt",
                                module: "aes_256_gcm",
                                configuration: {
                                    # not secret.
                                    kdfSalt: "T2huZzZpcGhhaWM3QWVwaDhybzRhaDNhbzFpc2VpOWFobDNSZWVQaGVvTWV1bmVaYWVsNHRoYWg5QWVENHNoYWg0ZGFoN3Rlb3NvcHVuZzNpZXZpMm9vTG9vbmc1YWlmb0RlZXAwYmFobDlab294b2hjaG9odjRhbzFsYWkwYWk=",
                                    kdfIterations: 2137,
                                    password: cluster.spec.benji.encryptionPassword,
                                },
                            },
                        ],
                        ios: [
                            { name: pool, module: "rbd" }
                            for pool in cluster.spec.benji.pools
                        ],
                    }),
                },
            },

            # Yes, Benji keeps data (backup metadata) on the ceph cluster that
            # it backs up. However:
            #  - we add a command to benji-k8s to also copy over the sqlite
            #    database over to s3
            #  - benji can, in a pinch, restore without a database if a version
            #    is known: https://benji-backup.me/restore.html#restoring-without-a-database
            data: kube.PersistentVolumeClaim(cluster.name("benji-data")) {
                metadata+: cluster.metadata,
                spec+: {
                    storageClassName: cluster.spec.benji.metadataStorageClass,
                    accessModes: [ "ReadWriteOnce" ],
                    resources: {
                        requests: {
                            storage: "1Gi",
                        },
                    },
                },
            },

            # Extra scripts.
            extrabins: kube.ConfigMap(cluster.name("benji-extrabins")) {
                metadata+: cluster.metadata,
                data: {
                    "metabackup.sh" : |||
                        # Make backups of sqlite3 metadata used by Benji.
                        # The backups live in the same bucket as backups, and the metabackups
                        # are named `metabackup-0..10`, where 0 is the newest backup. Any time
                        # this script is called, backups get shifted one way to the left (9 to 10,
                        # 8 to 9, etc). This ensures we have at least 10 backup replicas.

                        set -e

                        which s3cmd || pip install --upgrade s3cmd

                        AWS_ACCESS_KEY_ID=$(jq -r .storages[0].configuration.awsAccessKeyId < /etc/benji/benji.yaml)
                        AWS_SECRET_ACCESS_KEY=$(jq -r .storages[0].configuration.awsSecretAccessKey < /etc/benji/benji.yaml)
                        BUCKET=$(jq -r .storages[0].configuration.bucketName < /etc/benji/benji.yaml)

                        s3() {
                            s3cmd --host=s3.wasabisys.com \
                                "--host-bucket=%(bucket)s.s3.wasabisys.com" \
                                --region=eu-central-1 \
                                --access_key=$AWS_ACCESS_KEY_ID \
                                --secret_key=$AWS_SECRET_ACCESS_KEY \
                                "$@"
                        }

                        # Copy over old backups, if they exist.
                        for i in `seq 9 -1 0`; do
                            from="s3://$BUCKET/metabackup-$i.sqlite3"
                            to="s3://$BUCKET/metabackup-$((i+1)).sqlite3"

                            if [[ $(s3 ls $from | wc -l) -eq 0 ]]; then
                                echo "$from does not exist, skipping shift."
                                continue
                            fi
                            echo "Moving $from to $to..."
                            s3 mv $from $to
                        done

                        # Make new metabackup.
                        s3 put /data/benji.sqlite s3://$BUCKET/metabackup-0.sqlite3

                    |||,
                    "get-rook-creds.sh": |||
                        # Based on the Rook Toolbox /usr/local/bin/toolbox.sh script.
                        # Copyright 2016 The Rook Authors. All rights reserved.

                        CEPH_CONFIG="/etc/ceph/ceph.conf"
                        MON_CONFIG="/etc/rook/mon-endpoints"
                        KEYRING_FILE="/etc/ceph/keyring"

                        # create a ceph config file in its default location so ceph/rados tools can be used
                        # without specifying any arguments
                        write_endpoints() {
                            endpoints=$(cat ${MON_CONFIG})

                            # filter out the mon names
                            mon_endpoints=$(echo ${endpoints} | sed 's/[a-z]\+=//g')

                            # filter out the legacy mon names
                            mon_endpoints=$(echo ${mon_endpoints} | sed 's/rook-ceph-mon[0-9]\+=//g')

                            DATE=$(date)
                            echo "$DATE writing mon endpoints to ${CEPH_CONFIG}: ${endpoints}"
                            cat <<EOF > ${CEPH_CONFIG}
                        [global]
                        mon_host = ${mon_endpoints}

                        [client.admin]
                        keyring = ${KEYRING_FILE}
                        EOF
                        }

                        # watch the endpoints config file and update if the mon endpoints ever change
                        watch_endpoints() {
                            # get the timestamp for the target of the soft link
                            real_path=$(realpath ${MON_CONFIG})
                            initial_time=$(stat -c %Z ${real_path})
                            while true; do
                               real_path=$(realpath ${MON_CONFIG})
                               latest_time=$(stat -c %Z ${real_path})

                               if [[ "${latest_time}" != "${initial_time}" ]]; then
                                 write_endpoints
                                 initial_time=${latest_time}
                               fi
                               sleep 10
                            done
                        }

                        # create the keyring file
                        cat <<EOF > ${KEYRING_FILE}
                        [client.admin]
                        key = ${ROOK_ADMIN_SECRET}
                        EOF

                        # write the initial config file
                        write_endpoints

                        # continuously update the mon endpoints if they fail over
                        watch_endpoints &
                    |||
                },
            },

            cronjob: kube.CronJob(cluster.name("benji")) {
                metadata+: cluster.metadata,
                spec+: { # CronJob Spec
                    schedule: "42 0 * * *", # Daily at 42 minute past midnight.
                    jobTemplate+: {
                        spec+: { # Job Spec
                            selector:: null,
                            template+: {
                                spec+: { # PodSpec
                                    serviceAccountName: cluster.benji.sa.metadata.name,
                                    containers_: {
                                        benji: kube.Container(cluster.name("benji")) {
                                            # TODO(q3k): switch back to upstream after pull/65 goes in.
                                            # Currently this is being built from github.com/q3k/benji.
                                            # https://github.com/elemental-lf/benji/pull/65
                                            image: "registry.k0.hswaw.net/q3k/benji-k8s:20191221-2336",
                                            volumeMounts_: {
                                                extrabins: { mountPath: "/usr/local/extrabins" },
                                                monendpoints: { mountPath: "/etc/rook" },
                                                benjiconfig: { mountPath: "/etc/benji" },
                                                data: { mountPath: "/data" },
                                            },
                                            env_: {
                                                ROOK_ADMIN_SECRET: { secretKeyRef: { name: "rook-ceph-mon", key: "admin-secret" }},
                                            },
                                            command: [
                                                "bash", "-c", |||
                                                    bash /usr/local/extrabins/get-rook-creds.sh
                                                    benji-backup-pvc %s
                                                    benji-command enforce latest3,hours48,days7,months12
                                                    benji-command cleanup
                                                    bash /usr/local/extrabins/metabackup.sh
                                                ||| % [std.join(" ", ["--pool-filter=%s" % [p] for p in cluster.spec.benji.pools])],
                                            ],
                                        },
                                    },
                                    volumes_: {
                                        data: kube.PersistentVolumeClaimVolume(cluster.benji.data),
                                        benjiconfig: kube.SecretVolume(cluster.benji.config),
                                        extrabins: kube.ConfigMapVolume(cluster.benji.extrabins),
                                        monendpoints: {
                                            configMap: {
                                                name: "rook-ceph-mon-endpoints",
                                                items: [
                                                    { key: "data", path: "mon-endpoints" },
                                                ],
                                            },
                                        },
                                    },
                                },
                            },
                        },
                    },
                },
            },
        },
    },

    ReplicatedBlockPool(cluster, name):: {
        local pool = self,
        name:: name,

        spec:: error "spec must be specified",

        pool: kube._Object("ceph.rook.io/v1", "CephBlockPool", name) {
            metadata+: cluster.metadata,
            spec: pool.spec,
        },

        storageClass: kube.StorageClass(name) {
            provisioner: "ceph.rook.io/block",
            parameters: {
                blockPool: pool.pool.metadata.name,
                clusterNamespace: pool.pool.metadata.namespace,
                fstype: "ext4",
            },
            reclaimPolicy: "Retain",
        },
    },

    ECBlockPool(cluster, name):: {
        local pool = self,
        name:: name,
        metadataReplicas:: 3,

        spec:: error "spec must be specified",

        pool: kube._Object("ceph.rook.io/v1", "CephBlockPool", name) {
            metadata+: cluster.metadata,
            spec: pool.spec,
        },
        metapool: kube._Object("ceph.rook.io/v1", "CephBlockPool", name + "-metadata") {
            metadata+: cluster.metadata,
            spec: {
                failureDomain: "host",
                replicated: {
                    size: pool.metadataReplicas,
                },
            },
        },

        storageClass: kube.StorageClass(name) {
            provisioner: "ceph.rook.io/block",
            parameters: {
                blockPool: pool.metapool.metadata.name,
                dataBlockPool: pool.pool.metadata.name,
                clusterNamespace: pool.pool.metadata.namespace,
                fstype: "ext4",
            },
            reclaimPolicy: "Retain",
        },
    },

    // This is a rook CephObjectRealm which corresponds to a radosgw realm.
    //
    // A realm is a 'world' of radosgw user-facing metadata, like credentials,
    // buckets, and underlying structures like zones and zonegroups. A realm
    // contains zonegroups and zones, but a single Ceph cluster can actually
    // serve multiple realms, by running multiple radosgw instances.
    S3ObjectRealm(cluster, name):: {
        cluster:: cluster,
        realm: kube._Object("ceph.rook.io/v1", "CephObjectRealm", name) {
            metadata+: cluster.metadata,
        },
    },

    // This is a rook CephObjectZoneGroup which corresponds to a radosgw
    // zonegroup.
    //
    // A zonegroup contains zones, and zones within a zonegroup will serve a
    // concise view of objects in buckets, and will sync between eachother to
    // eventually contain the same data.
    //
    // A single zonegroup within a realm must be a 'master' zonegroup, and will
    // then hold and replicate the metadata of this realm. All realm operations
    // via radosgw-admin must be performed within the master zonegroup.
    S3ObjectZoneGroup(realm, name):: {
        realm:: realm,
        zonegroup: kube._Object("ceph.rook.io/v1", "CephObjectZoneGroup", name) {
            metadata+: realm.cluster.metadata,
            spec+: {
                realm: realm.realm.metadata.name,
            },
        },
    },

    // This is a CephObjectZone but also a CephObjectStore.
    //
    // Rook attempts to hide away Ceph's radosgw multisite structures
    // (realm/zonegroup/zone) by presenting a single CRD named
    // 'CephObjectStore'. When such a resource is created, Rook will create a
    // realm, zonegroup and zone under the hood, as a radosgw zone is required
    // to serve data, and a radosgw zone cannot exist without a zonegroup, and
    // a radosgw zonegroup cannot exist without a realm.
    //
    // However, rook also exposes the lower-level API by letting the user
    // specify 'zone' in the ObjectStore's spec, which should point to a
    // CephObjectZone. Then, an entirely different reconciliation codepath is
    // taken and instead users are expected to manage
    // CephObject{Realm,ZoneGroup,Zone} manually at Ceph's native abstraction
    // level.
    //
    // CephObjectStore not only represents a Ceph zone (and possibly
    // zonegroup/realm), but also pods and services that are required to servev
    // radosgw data publicly. That's why S3ObjectStore takes parameters like
    // 'public port' and 'instance number'.
    //
    // To add to the confusion, our S3ObjectStore wrapper also sprinkles in an
    // Ingress with TLS to terminate the above service, and automatically
    // creates a CephObjectZone.
    //
    // This whole jsonent abstraction basically forces users to manually create
    // realms and zonegroups, but makes it very easy to do so. By forcing these
    // to be explicitly created by rook objects, only the 'multi-site'
    // reconciliation codepath is taken in rook, making things a bit more
    // predictable.
    S3ObjectStore(zonegroup, name):: {
        local store = self,
        spec:: {
            dataPool: error "spec.dataPool must be specified",
            metadataPool: error "spec.metadataPool must be specified",
        },

        cfg:: {
            // We want to have each rgw run under a domain corresponding to the
            // zone it's running in, but also to the zonegroup it's running in.
            // This will allow us to DNS loadbalance a zonegroup to be backed
            // by multiple zone ingresses.
            domainParts: [
                zonegroup.zone.metadata.name,
                zonegroup.zonegroup.metadata.name,
            ],
            domains: [
                "object.ceph-%s.hswaw.net" % [part]
                for part in cfg.domainParts
            ],
        },
        local cfg = self.cfg,

        zone: kube._Object("ceph.rook.io/v1", "CephObjectZone", name) {
            metadata+: zonegroup.realm.cluster.metadata,
            spec: store.spec {
                zoneGroup: zonegroup.zonegroup.metadata.name,
            },
        },

        objectStore: kube._Object("ceph.rook.io/v1", "CephObjectStore", name) {
            metadata+: zonegroup.realm.cluster.metadata,
            spec: {
                gateway: {
                    port: 80,
                    instances: 1,
                    allNodes: false,
                },
                zone: {
                    name: name,
                },
                preservePoolsOnDelete: true,
            },
        },

        objectIngress: kube.Ingress(name) {
            metadata+: zonegroup.realm.cluster.metadata {
                annotations+: {
                    "kubernetes.io/tls-acme": "true",
                    "certmanager.k8s.io/cluster-issuer": "letsencrypt-prod",
                    "nginx.ingress.kubernetes.io/proxy-body-size": "0",
                },
            },

            spec+: {
                tls: [
                    {
                        hosts: cfg.domains,
                        secretName: "%s-tls" % [name],
                    },
                ],
                rules: [
                    {
                        host: domain,
                        http: {
                            paths: [
                                {
                                    path: "/",
                                    backend: {
                                        serviceName: "rook-ceph-rgw-%s" % [name],
                                        servicePort: 80,
                                    },
                                },
                            ]
                        },
                    }
                    for domain in cfg.domains
                ],
            },
        },
    },
}
