cluster: k0: move ceph-waw3 to proper realm/zonegroup With this we can use Ceph's multi-site support to easily migrate to our new k0 Ceph cluster. This migration was done by using radosgw-admin to rename the existing realm/zonegroup to the new names (hscloud and eu), and then reworking the jsonnet so that the Rook operator would effectively do nothing. It sounds weird that creating a bunch of CRs like Object{Realm,ZoneGroup,Zone} realm would be a no-op for the operator, but that's how Rook works - a CephObjectStore generally creates everything that the above CRs would create too, but implicitly. Adding the extra CRs just allows specifying extra settings, like names. (it wasn't fully a no-op, as the rgw daemon is parametrized by realm/zonegroup/zone names, so that had to be restarted) We also make the radosgw serve under object.ceph-eu.hswaw.net, which allows us to right away start using a zonegroup URL instead of the zone-only URL. Change-Id: I4dca55a705edb3bd28e54f50982c85720a17b877

commit: 38f72fe094bc0573435801ea2e4d1d6a9dc0caf2 [log] [tgz]
author: Serge Bazanski <q3k@hackerspace.pl> Mon Sep 13 23:43:47 2021 +0000
committer: Serge Bazanski <q3k@hackerspace.pl> Tue Sep 14 21:39:39 2021 +0200
tree: bce21d04ac24c14b8898cd98f72480940cd90e75
parent: 18084c1e8629f1137d5107e101896c999fd89643 [diff]
diff --git a/cluster/kube/lib/rook.libsonnet b/cluster/kube/lib/rook.libsonnet
index 6646df5..4acf80d 100644
--- a/cluster/kube/lib/rook.libsonnet
+++ b/cluster/kube/lib/rook.libsonnet

@@ -1087,38 +1087,133 @@
         },
     },
 
-    S3ObjectStore(cluster, name):: {
-        local store = self,
-        spec:: error "spec must be specified",
-        objectStore: kube._Object("ceph.rook.io/v1", "CephObjectStore", name) {
+    // This is a rook CephObjectRealm which corresponds to a radosgw realm.
+    //
+    // A realm is a 'world' of radosgw user-facing metadata, like credentials,
+    // buckets, and underlying structures like zones and zonegroups. A realm
+    // contains zonegroups and zones, but a single Ceph cluster can actually
+    // serve multiple realms, by running multiple radosgw instances.
+    S3ObjectRealm(cluster, name):: {
+        cluster:: cluster,
+        realm: kube._Object("ceph.rook.io/v1", "CephObjectRealm", name) {
             metadata+: cluster.metadata,
+        },
+    },
+
+    // This is a rook CephObjectZoneGroup which corresponds to a radosgw
+    // zonegroup.
+    //
+    // A zonegroup contains zones, and zones within a zonegroup will serve a
+    // concise view of objects in buckets, and will sync between eachother to
+    // eventually contain the same data.
+    //
+    // A single zonegroup within a realm must be a 'master' zonegroup, and will
+    // then hold and replicate the metadata of this realm. All realm operations
+    // via radosgw-admin must be performed within the master zonegroup.
+    S3ObjectZoneGroup(realm, name):: {
+        realm:: realm,
+        zonegroup: kube._Object("ceph.rook.io/v1", "CephObjectZoneGroup", name) {
+            metadata+: realm.cluster.metadata,
+            spec+: {
+                realm: realm.realm.metadata.name,
+            },
+        },
+    },
+
+    // This is a CephObjectZone but also a CephObjectStore.
+    //
+    // Rook attempts to hide away Ceph's radosgw multisite structures
+    // (realm/zonegroup/zone) by presenting a single CRD named
+    // 'CephObjectStore'. When such a resource is created, Rook will create a
+    // realm, zonegroup and zone under the hood, as a radosgw zone is required
+    // to serve data, and a radosgw zone cannot exist without a zonegroup, and
+    // a radosgw zonegroup cannot exist without a realm.
+    //
+    // However, rook also exposes the lower-level API by letting the user
+    // specify 'zone' in the ObjectStore's spec, which should point to a
+    // CephObjectZone. Then, an entirely different reconciliation codepath is
+    // taken and instead users are expected to manage
+    // CephObject{Realm,ZoneGroup,Zone} manually at Ceph's native abstraction
+    // level.
+    //
+    // CephObjectStore not only represents a Ceph zone (and possibly
+    // zonegroup/realm), but also pods and services that are required to servev
+    // radosgw data publicly. That's why S3ObjectStore takes parameters like
+    // 'public port' and 'instance number'.
+    //
+    // To add to the confusion, our S3ObjectStore wrapper also sprinkles in an
+    // Ingress with TLS to terminate the above service, and automatically
+    // creates a CephObjectZone.
+    //
+    // This whole jsonent abstraction basically forces users to manually create
+    // realms and zonegroups, but makes it very easy to do so. By forcing these
+    // to be explicitly created by rook objects, only the 'multi-site'
+    // reconciliation codepath is taken in rook, making things a bit more
+    // predictable.
+    S3ObjectStore(zonegroup, name):: {
+        local store = self,
+        spec:: {
+            dataPool: error "spec.dataPool must be specified",
+            metadataPool: error "spec.metadataPool must be specified",
+        },
+
+        cfg:: {
+            // We want to have each rgw run under a domain corresponding to the
+            // zone it's running in, but also to the zonegroup it's running in.
+            // This will allow us to DNS loadbalance a zonegroup to be backed
+            // by multiple zone ingresses.
+            domainParts: [
+                zonegroup.zone.metadata.name,
+                zonegroup.zonegroup.metadata.name,
+            ],
+            domains: [
+                "object.ceph-%s.hswaw.net" % [part]
+                for part in cfg.domainParts
+            ],
+        },
+        local cfg = self.cfg,
+
+        zone: kube._Object("ceph.rook.io/v1", "CephObjectZone", name) {
+            metadata+: zonegroup.realm.cluster.metadata,
             spec: store.spec {
+                zoneGroup: zonegroup.zonegroup.metadata.name,
+            },
+        },
+
+        objectStore: kube._Object("ceph.rook.io/v1", "CephObjectStore", name) {
+            metadata+: zonegroup.realm.cluster.metadata,
+            spec: {
                 gateway: {
                     port: 80,
                     instances: 1,
                     allNodes: false,
                 },
+                zone: {
+                    name: name,
+                },
+                preservePoolsOnDelete: true,
             },
         },
 
         objectIngress: kube.Ingress(name) {
-            metadata+: cluster.metadata {
+            metadata+: zonegroup.realm.cluster.metadata {
                 annotations+: {
                     "kubernetes.io/tls-acme": "true",
                     "certmanager.k8s.io/cluster-issuer": "letsencrypt-prod",
                     "nginx.ingress.kubernetes.io/proxy-body-size": "0",
                 },
             },
+
             spec+: {
                 tls: [
                     {
-                        hosts: ["object.%s.hswaw.net" % [cluster.metadata.namespace]],
+                        hosts: cfg.domains,
                         secretName: "%s-tls" % [name],
                     },
                 ],
                 rules: [
                     {
-                        host: "object.%s.hswaw.net" % [cluster.metadata.namespace],
+                        host: domain,
                         http: {
                             paths: [
                                 {
@@ -1131,6 +1226,7 @@
                             ]
                         },
                     }
+                    for domain in cfg.domains
                 ],
             },
         },
commit	38f72fe094bc0573435801ea2e4d1d6a9dc0caf2	[log] [tgz]
author	Serge Bazanski <q3k@hackerspace.pl>	Mon Sep 13 23:43:47 2021 +0000
committer	Serge Bazanski <q3k@hackerspace.pl>	Tue Sep 14 21:39:39 2021 +0200
tree	bce21d04ac24c14b8898cd98f72480940cd90e75
parent	18084c1e8629f1137d5107e101896c999fd89643 [diff]