cluster/kube/lib/cockroachdb: use manual node pinning
We move away from the StatefulSet based deployment to manually starting
a deployment per intended node. This allows us to pin indivisual
instances of Cockroach to particular nodes, so that they state
co-located with their data.
diff --git a/cluster/kube/lib/cockroachdb.libsonnet b/cluster/kube/lib/cockroachdb.libsonnet
index def9bcb..4ce2af7 100644
--- a/cluster/kube/lib/cockroachdb.libsonnet
+++ b/cluster/kube/lib/cockroachdb.libsonnet
@@ -1,9 +1,16 @@
-# Deploy a 3-node CockroachDB cluster in secure mode.
+# Deploy a CockroachDB cluster in secure mode.
+# This creates an N-node cluster based on a given static topology.
# Can be used either in own namespace or in an existing one:
# crdb: cockroachdb.Cluster("q3kdb") {
# cfg+: {
# namespace: "q3k", // if not given, will create 'q3kdb' namespace
+# topology: [
+# { name: "a", node: "bc01n01.hswaw.net", ip: "185.236.240.35" },
+# { name: "b", node: "bc01n02.hswaw.net", ip: "185.236.240.36" },
+# { name: "c", node: "bc01n03.hswaw.net", ip: "185.236.240.37" },
+# ],
+# hostPath: "/var/db/cockroach-q3k",
# },
#},
#
@@ -14,7 +21,7 @@
# Then, you can create some users and databases for applications:
# defaultdb> CREATE DATABASE wykop;
# defaultdb> CREATE USER bialkov PASSWORD hackme;
-# defaultdb> GRANT ALL ON DATABASE wykop to bialkov;
+# defaultdb> GRANT ALL ON DATABASE wykop TO bialkov;
#
# You are then ready to access the database via the public service from your application.
#
@@ -36,6 +43,13 @@
cfg:: {
image: "cockroachdb/cockroach:v19.1.0",
+
+ # Must be unique per cluster.
+ portServe: 26257,
+ portHttp: 8080,
+ hostPath: error "hostPath must be defined",
+ topology: error "topology must be defined",
+
namespace: null,
ownNamespace: cluster.cfg.namespace == null,
},
@@ -57,8 +71,6 @@
name(suffix):: if cluster.cfg.ownNamespace then suffix else name + "-" + suffix,
- hosts:: ["%s-%d.%s.cluster.local" % [cluster.statefulSet.metadata.name, n, cluster.internalService.host] for n in std.range(0, cluster.statefulSet.spec.replicas)],
-
pki: {
selfSignedIssuer: cm.Issuer(cluster.name("selfsigned")) {
metadata+: cluster.metadata,
@@ -99,14 +111,14 @@
},
commonName: "node",
dnsNames: [
- "localhost",
- "127.0.0.1",
cluster.publicService.metadata.name,
std.join(".", [cluster.publicService.metadata.name, cluster.metadata.namespace ]),
+ cluster.publicService.host,
std.join(".", [cluster.publicService.host, "cluster.local" ]),
- std.join(".", [ "*", cluster.internalService.metadata.name ]),
- std.join(".", [ "*", cluster.internalService.metadata.name, cluster.metadata.namespace ]),
- std.join(".", [ "*", cluster.internalService.host, "cluster.local" ]),
+ std.join(".", [cluster.publicService.metadata.name, cluster.metadata.namespace ]),
+ ] + [
+ "%s.cluster.local" % s.service.host
+ for s in cluster.servers
],
},
},
@@ -147,35 +159,15 @@
publicService: kube.Service(cluster.name("public")) {
metadata+: cluster.metadata,
- target_pod:: cluster.statefulSet.spec.template,
+ target_pod:: cluster.servers[0].deploy.spec.template,
spec+: {
ports: [
- { name: "grpc", port: 26257, targetPort: 26257 },
- { name: "http", port: 8080, targetPort: 8080 },
+ { name: "grpc", port: cluster.cfg.portServe, targetPort: cluster.cfg.portServe },
+ { name: "http", port: cluster.cfg.portHttp, targetPort: cluster.cfg.portHttp },
],
},
},
- internalService: kube.Service(cluster.name("internal")) {
- metadata+: cluster.metadata + {
- annotations+: {
- "service.alpha.kubernetes.io/tolerate-unready-endpoints": "true",
- "prometheus.io/scrape": "true",
- "prometheus.io/path": "_status/vars",
- "prometheus.io/port": "8080",
- },
- },
- target_pod:: cluster.statefulSet.spec.template,
- spec+: {
- ports: [
- { name: "grpc", port: 26257, targetPort: 26257 },
- { name: "http", port: 8080, targetPort: 8080 },
- ],
- publishNotReadyAddresses: true,
- clusterIP: "None",
- },
- },
-
podDisruptionBudget: kube.PodDisruptionBudget(cluster.name("pod")) {
metadata+: cluster.metadata,
spec: {
@@ -188,128 +180,137 @@
},
},
- statefulSet: kube.StatefulSet(cluster.name("cockroachdb")) {
- metadata+: cluster.metadata {
- labels+: {
- "app.kubernetes.io/component": "server",
- },
- },
- spec+: {
- serviceName: cluster.internalService.metadata.name,
- replicas: 3,
- template: {
- metadata: cluster.statefulSet.metadata,
+ servers: [
+ {
+ local server = self,
+ service: kube.Service(cluster.name("server-" + el.name)) {
+ metadata+: cluster.metadata + {
+ annotations+: {
+ "service.alpha.kubernetes.io/tolerate-unready-endpoints": "true",
+ "prometheus.io/scrape": "true",
+ "prometheus.io/path": "_status/vars",
+ "prometheus.io/port": std.toString(cluster.cfg.portHttp),
+ },
+ },
+ target_pod:: server.deploy.spec.template,
spec+: {
- dnsPolicy: "ClusterFirst",
- serviceAccountName: cluster.serviceAccount.metadata.name,
- affinity: {
- podAntiAffinity: {
- preferredDuringSchedulingIgnoredDuringExecution: [
- {
- weight: 100,
- podAffinityTerm: {
- labelSelector: {
- matchExpressions: [
- {
- key: "app.kubernetes.io/component",
- operator: "In",
- values: [ "cockroachdb" ],
- },
- ],
+ ports: [
+ { name: "grpc", port: cluster.cfg.portServe, targetPort: cluster.cfg.portServe },
+ { name: "http", port: cluster.cfg.portHttp, targetPort: cluster.cfg.portHttp },
+ ],
+ publishNotReadyAddresses: true,
+ clusterIP: "None",
+ },
+ },
+ deploy: kube.Deployment(cluster.name("server-" + el.name)) {
+ metadata+: cluster.metadata {
+ labels+: {
+ "app.kubernetes.io/component": "server",
+ "kubernetes.hackerspace.pl/cockroachdb-server": el.name,
+ },
+ },
+ spec+: {
+ template+: {
+ metadata: server.deploy.metadata,
+ spec+: {
+ dnsPolicy: "ClusterFirst",
+ serviceAccountName: cluster.serviceAccount.metadata.name,
+ nodeSelector: {
+ "kubernetes.io/hostname": el.node,
+ },
+ containers: [
+ kube.Container("cockroachdb") {
+ image: cluster.cfg.image,
+ imagePullPolicy: "IfNotPresent",
+ resources: {
+ requests: {
+ cpu: "2",
+ memory: "6Gi",
},
- topologyKey: "kubernetes.io/hostname",
+ limits: {
+ memory: "6Gi",
+ },
+ },
+ ports_: {
+ "grpc": { containerPort: cluster.cfg.portServe },
+ "http": { containerPort: cluster.cfg.portHttp },
+ },
+ livenessProbe: {
+ httpGet: {
+ path: "/health",
+ port: "http",
+ },
+ initialDelaySeconds: 30,
+ periodSeconds: 5,
+ },
+ readinessProbe: {
+ httpGet: {
+ path: "/health?ready=1",
+ port: "http",
+ },
+ initialDelaySeconds: 10,
+ periodSeconds: 5,
+ failureThreshold: 2,
+ },
+ volumeMounts: [
+ {
+ name: "datadir",
+ mountPath: "/cockroach/cockroach-data",
+ },
+ {
+ name: "certs",
+ mountPath: "/cockroach/cockroach-certs/node.crt",
+ subPath: "tls.crt",
+ },
+ {
+ name: "certs",
+ mountPath: "/cockroach/cockroach-certs/node.key",
+ subPath: "tls.key",
+ },
+ {
+ name: "certs",
+ mountPath: "/cockroach/cockroach-certs/ca.crt",
+ subPath: "ca.crt",
+ },
+ ],
+ env_: {
+ "COCKROACH_CERTS_DIR": "/cockroach/cockroach-certs",
+ },
+ command: [
+ "/cockroach/cockroach", "start",
+ "--logtostderr",
+ "--certs-dir", "/cockroach/cockroach-certs",
+ "--advertise-host", "%s.cluster.local" % server.service.host,
+ "--cache", "25%", "--max-sql-memory", "25%",
+ "--join", std.join(",", ["%s.cluster.local:%d" % [s.service.host, cluster.cfg.portServe] for s in cluster.servers]),
+ "--listen-addr=0.0.0.0:%d" % cluster.cfg.portServe,
+ "--http-addr=0.0.0.0:%d" % cluster.cfg.portHttp,
+ ],
+ },
+ ],
+ terminationGracePeriodSeconds: 60,
+ volumes: [
+ {
+ name: "datadir",
+ hostPath: {
+ path: cluster.cfg.hostPath,
+ },
+ },
+ {
+ name: "certs",
+ secret: {
+ secretName: cluster.pki.nodeCertificate.spec.secretName,
+ defaultMode: kube.parseOctal("400"),
},
},
],
},
},
- containers: [
- kube.Container("cockroachdb") {
- image: cluster.cfg.image,
- imagePullPolicy: "IfNotPresent",
- resources: {
- requests: {
- cpu: "2",
- memory: "6Gi",
- },
- limits: {
- memory: "6Gi",
- },
- },
- ports_: {
- "grpc": { containerPort: 26257 },
- "http": { containerPort: 8080 },
- },
- livenessProbe: {
- httpGet: {
- path: "/health",
- port: "http",
- },
- initialDelaySeconds: 30,
- periodSeconds: 5,
- },
- readinessProbe: {
- httpGet: {
- path: "/health?ready=1",
- port: "http",
- },
- initialDelaySeconds: 10,
- periodSeconds: 5,
- failureThreshold: 2,
- },
- volumeMounts: [
- {
- name: "datadir",
- mountPath: "/cockroach/cockroach-data",
- },
- {
- name: "certs",
- mountPath: "/cockroach/cockroach-certs/node.crt",
- subPath: "tls.crt",
- },
- {
- name: "certs",
- mountPath: "/cockroach/cockroach-certs/node.key",
- subPath: "tls.key",
- },
- {
- name: "certs",
- mountPath: "/cockroach/cockroach-certs/ca.crt",
- subPath: "ca.crt",
- },
- ],
- env_: {
- "COCKROACH_CERTS_DIR": "/cockroach/cockroach-certs",
- },
- command: [
- "/bin/bash",
- "-ecx",
- "exec /cockroach/cockroach start --logtostderr --certs-dir /cockroach/cockroach-certs --advertise-host $(hostname -f) --http-addr 0.0.0.0 --cache 25% --max-sql-memory 25% --join " + std.join(",", cluster.hosts),
- ],
- },
- ],
- terminationGracePeriodSeconds: 60,
- volumes: [
- {
- name: "datadir",
- emptyDir: {},
- },
- {
- name: "certs",
- secret: {
- secretName: cluster.pki.nodeCertificate.spec.secretName,
- defaultMode: kube.parseOctal("400"),
- },
- },
- ],
},
- },
- podManagementPolicy: "Parallel",
- updateStrategy: {
- type: "RollingUpdate",
- },
- },
- },
+ }
+ }
+ for el in cluster.cfg.topology
+ ],
initJob: kube.Job(cluster.name("init")) {
metadata+: cluster.metadata,
@@ -328,7 +329,7 @@
command: [
"/bin/bash",
"-ecx",
- "/cockroach/cockroach init --host=" + cluster.hosts[0],
+ "/cockroach/cockroach init --host=%s.cluster.local:%d" % [cluster.servers[0].service.host, cluster.cfg.portServe],
],
volumeMounts: [
{
@@ -377,7 +378,8 @@
image: cluster.cfg.image,
env_: {
"COCKROACH_CERTS_DIR": "/cockroach/cockroach-certs",
- "COCKROACH_HOST": cluster.hosts[0],
+ "COCKROACH_HOST": cluster.publicService.host,
+ "COCKROACH_PORT": std.toString(cluster.cfg.portServe),
},
command: ["sleep", "2147483648"], //(FIXME) keep the client pod running indefinitely
volumeMounts: [