blob: 7525911b6991798a6d3a072838f4179214d56ba2 [file] [log] [blame]
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +01001# Top level cluster configuration.
2
3local kube = import "../../kube/kube.libsonnet";
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +02004local policies = import "../../kube/policies.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +02005
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +01006local calico = import "lib/calico.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +02007local certmanager = import "lib/cert-manager.libsonnet";
8local cockroachdb = import "lib/cockroachdb.libsonnet";
9local coredns = import "lib/coredns.libsonnet";
Sergiusz Bazanski1e565dc2019-01-18 09:40:59 +010010local metallb = import "lib/metallb.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +020011local metrics = import "lib/metrics.libsonnet";
Sergiusz Bazanskia9c7e862019-04-01 17:56:28 +020012local nginx = import "lib/nginx.libsonnet";
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020013local prodvider = import "lib/prodvider.libsonnet";
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020014local registry = import "lib/registry.libsonnet";
Sergiusz Bazanskib7fcc672019-04-01 18:40:50 +020015local rook = import "lib/rook.libsonnet";
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020016local pki = import "lib/pki.libsonnet";
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010017
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020018local Cluster(short, realm) = {
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010019 local cluster = self,
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020020 local cfg = cluster.cfg,
21
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020022 short:: short,
23 realm:: realm,
24 fqdn:: "%s.%s" % [cluster.short, cluster.realm],
25
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020026 cfg:: {
27 // Storage class used for internal services (like registry). This must
28 // be set to a valid storage class. This can either be a cloud provider class
29 // (when running on GKE &co) or a storage class created using rook.
30 storageClassNameRedundant: error "storageClassNameRedundant must be set",
31 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010032
33 // These are required to let the API Server contact kubelets.
34 crAPIServerToKubelet: kube.ClusterRole("system:kube-apiserver-to-kubelet") {
35 metadata+: {
36 annotations+: {
37 "rbac.authorization.kubernetes.io/autoupdate": "true",
38 },
39 labels+: {
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020040 "kubernetes.io/bootstrapping": "rbac-defaults",
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010041 },
42 },
43 rules: [
44 {
45 apiGroups: [""],
46 resources: ["nodes/%s" % r for r in [ "proxy", "stats", "log", "spec", "metrics" ]],
47 verbs: ["*"],
48 },
49 ],
50 },
Sergiusz Bazanski5bebbeb2019-01-13 22:08:05 +010051 crbAPIServer: kube.ClusterRoleBinding("system:kube-apiserver") {
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010052 roleRef: {
53 apiGroup: "rbac.authorization.k8s.io",
54 kind: "ClusterRole",
55 name: cluster.crAPIServerToKubelet.metadata.name,
56 },
57 subjects: [
58 {
59 apiGroup: "rbac.authorization.k8s.io",
60 kind: "User",
61 # A cluster API Server authenticates with a certificate whose CN is == to the FQDN of the cluster.
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020062 name: cluster.fqdn,
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010063 },
64 ],
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +010065 },
66
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020067 // This ClusteRole is bound to all humans that log in via prodaccess/prodvider/SSO.
68 // It should allow viewing of non-sensitive data for debugability and openness.
69 crViewer: kube.ClusterRole("system:viewer") {
70 rules: [
71 {
72 apiGroups: [""],
73 resources: [
74 "nodes",
75 "namespaces",
76 "pods",
77 "configmaps",
78 "services",
79 ],
80 verbs: ["list"],
81 },
82 {
83 apiGroups: ["metrics.k8s.io"],
84 resources: [
85 "nodes",
86 "pods",
87 ],
88 verbs: ["list"],
89 },
90 {
91 apiGroups: ["apps"],
92 resources: [
93 "statefulsets",
94 ],
95 verbs: ["list"],
96 },
97 {
98 apiGroups: ["extensions"],
99 resources: [
100 "deployments",
101 "ingresses",
102 ],
103 verbs: ["list"],
104 }
105 ],
106 },
107 // This ClusterRole is applied (scoped to personal namespace) to all humans.
108 crFullInNamespace: kube.ClusterRole("system:admin-namespace") {
109 rules: [
110 {
111 apiGroups: ["*"],
112 resources: ["*"],
113 verbs: ["*"],
114 },
115 ],
116 },
117 // This ClusterRoleBindings allows root access to cluster admins.
118 crbAdmins: kube.ClusterRoleBinding("system:admins") {
119 roleRef: {
120 apiGroup: "rbac.authorization.k8s.io",
121 kind: "ClusterRole",
122 name: "cluster-admin",
123 },
124 subjects: [
125 {
126 apiGroup: "rbac.authorization.k8s.io",
127 kind: "User",
128 name: user + "@hackerspace.pl",
129 } for user in [
130 "q3k",
131 "implr",
132 "informatic",
133 ]
134 ],
135 },
136
137 podSecurityPolicies: policies.Cluster {},
138
139 allowInsecureNamespaces: [
140 policies.AllowNamespaceInsecure("kube-system"),
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100141 policies.AllowNamespaceInsecure("metallb-system"),
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200142 # TODO(q3k): fix this?
143 policies.AllowNamespaceInsecure("ceph-waw2"),
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100144 policies.AllowNamespaceInsecure("ceph-waw3"),
Sergiusz Bazanski5f3a5e02019-09-25 02:51:51 +0200145 policies.AllowNamespaceInsecure("matrix"),
146 policies.AllowNamespaceInsecure("registry"),
147 policies.AllowNamespaceInsecure("internet"),
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200148 ],
149
150 // Allow all service accounts (thus all controllers) to create secure pods.
151 crbAllowServiceAccountsSecure: kube.ClusterRoleBinding("policy:allow-all-secure") {
152 roleRef_: cluster.podSecurityPolicies.secureRole,
153 subjects: [
154 {
155 kind: "Group",
156 apiGroup: "rbac.authorization.k8s.io",
157 name: "system:serviceaccounts",
158 }
159 ],
160 },
161
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +0100162 // Calico network fabric
163 calico: calico.Environment {},
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +0100164 // CoreDNS for this cluster.
Sergiusz Bazanski54490d32019-10-02 20:47:18 +0200165 dns: coredns.Environment {
166 cfg+: {
167 cluster_domains: [
168 "cluster.local",
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200169 cluster.fqdn,
Sergiusz Bazanski54490d32019-10-02 20:47:18 +0200170 ],
171 },
172 },
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +0100173 // Metrics Server
174 metrics: metrics.Environment {},
Sergiusz Bazanski1e565dc2019-01-18 09:40:59 +0100175 // Metal Load Balancer
Sergiusz Bazanski14cbacb2019-04-01 18:00:44 +0200176 metallb: metallb.Environment {
177 cfg+: {
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100178 peers: [
179 {
180 "peer-address": "185.236.240.33",
181 "peer-asn": 65001,
182 "my-asn": 65002,
183 },
184 ],
Sergiusz Bazanski14cbacb2019-04-01 18:00:44 +0200185 addressPools: [
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100186 {
187 name: "public-v4-1",
188 protocol: "bgp",
189 addresses: [
190 "185.236.240.48/28",
191 ],
192 },
193 {
194 name: "public-v4-2",
195 protocol: "bgp",
196 addresses: [
197 "185.236.240.112/28"
198 ],
199 },
Sergiusz Bazanski14cbacb2019-04-01 18:00:44 +0200200 ],
201 },
202 },
Sergiusz Bazanskia9c7e862019-04-01 17:56:28 +0200203 // Main nginx Ingress Controller
204 nginx: nginx.Environment {},
Piotr Dobrowolski79ddbc52019-04-02 13:20:15 +0200205 certmanager: certmanager.Environment {},
Sergiusz Bazanskie31d64f2019-10-02 20:59:26 +0200206 issuer: kube.ClusterIssuer("letsencrypt-prod") {
Piotr Dobrowolski3187c592019-04-02 14:44:04 +0200207 spec: {
208 acme: {
209 server: "https://acme-v02.api.letsencrypt.org/directory",
210 email: "bofh@hackerspace.pl",
211 privateKeySecretRef: {
212 name: "letsencrypt-prod"
213 },
214 http01: {},
215 },
216 },
217 },
Sergiusz Bazanskic6da1272019-04-02 00:06:13 +0200218
Sergiusz Bazanskib7fcc672019-04-01 18:40:50 +0200219 // Rook Ceph storage
Sergiusz Bazanskic3b0f762019-06-20 16:42:19 +0200220 rook: rook.Operator {
221 operator+: {
222 spec+: {
223 // TODO(q3k): Bring up the operator again when stability gets fixed
224 // See: https://github.com/rook/rook/issues/3059#issuecomment-492378873
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200225 replicas: 1,
Sergiusz Bazanskic3b0f762019-06-20 16:42:19 +0200226 },
227 },
228 },
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200229
230 // Docker registry
231 registry: registry.Environment {
232 cfg+: {
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200233 domain: "registry.%s" % [cluster.fqdn],
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200234 storageClassName: cfg.storageClassNameParanoid,
235 objectStorageName: "waw-hdd-redundant-2-object",
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200236 },
237 },
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200238
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200239 // TLS PKI machinery
240 pki: pki.Environment(cluster.short, cluster.realm),
241
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200242 // Prodvider
Sergiusz Bazanskid186e942019-10-04 13:46:39 +0200243 prodvider: prodvider.Environment {
244 cfg+: {
245 apiEndpoint: "kubernetes.default.svc.%s" % [cluster.fqdn],
246 },
247 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100248};
249
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +0100250
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100251{
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200252 k0: {
253 local k0 = self,
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200254 cluster: Cluster("k0", "hswaw.net") {
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200255 cfg+: {
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100256 storageClassNameParanoid: k0.ceph.waw2Pools.blockParanoid.name,
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200257 },
258 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200259 cockroach: {
Sergiusz Bazanskid5338922019-08-09 14:13:50 +0200260 waw2: cockroachdb.Cluster("crdb-waw1") {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200261 cfg+: {
262 topology: [
Sergiusz Bazanski184678b2019-06-22 02:07:41 +0200263 { name: "bc01n01", node: "bc01n01.hswaw.net" },
264 { name: "bc01n02", node: "bc01n02.hswaw.net" },
265 { name: "bc01n03", node: "bc01n03.hswaw.net" },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200266 ],
Sergiusz Bazanskid5338922019-08-09 14:13:50 +0200267 hostPath: "/var/db/crdb-waw1",
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200268 },
269 },
Sergiusz Bazanski1fad2e52019-08-01 20:16:27 +0200270 clients: {
271 cccampix: k0.cockroach.waw2.Client("cccampix"),
272 cccampixDev: k0.cockroach.waw2.Client("cccampix-dev"),
Sergiusz Bazanski90e8e682020-03-25 10:55:05 +0100273 buglessDev: k0.cockroach.waw2.Client("bugless-dev"),
Sergiusz Bazanski1fad2e52019-08-01 20:16:27 +0200274 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200275 },
276 ceph: {
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200277 // waw1 cluster - dead as of 2019/08/06, data corruption
278 // waw2 cluster
279 waw2: rook.Cluster(k0.cluster.rook, "ceph-waw2") {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200280 spec: {
281 mon: {
282 count: 3,
283 allowMultiplePerNode: false,
284 },
285 storage: {
286 useAllNodes: false,
287 useAllDevices: false,
288 config: {
289 databaseSizeMB: "1024",
290 journalSizeMB: "1024",
291 },
292 nodes: [
293 {
294 name: "bc01n01.hswaw.net",
295 location: "rack=dcr01 chassis=bc01 host=bc01n01",
296 devices: [ { name: "sda" } ],
297 },
298 {
299 name: "bc01n02.hswaw.net",
300 location: "rack=dcr01 chassis=bc01 host=bc01n02",
301 devices: [ { name: "sda" } ],
302 },
303 {
304 name: "bc01n03.hswaw.net",
305 location: "rack=dcr01 chassis=bc01 host=bc01n03",
306 devices: [ { name: "sda" } ],
307 },
308 ],
309 },
Sergiusz Bazanski13bb1bf2019-08-31 16:33:29 +0200310 benji:: {
311 metadataStorageClass: "waw-hdd-paranoid-2",
312 encryptionPassword: std.split((importstr "../secrets/plain/k0-benji-encryption-password"), '\n')[0],
313 pools: [
314 "waw-hdd-redundant-2",
315 "waw-hdd-redundant-2-metadata",
316 "waw-hdd-paranoid-2",
317 "waw-hdd-yolo-2",
318 ],
319 s3Configuration: {
320 awsAccessKeyId: "RPYZIROFXNLQVU2WJ4R3",
321 awsSecretAccessKey: std.split((importstr "../secrets/plain/k0-benji-secret-access-key"), '\n')[0],
322 bucketName: "benji-k0-backups",
323 endpointUrl: "https://s3.eu-central-1.wasabisys.com/",
324 },
325 }
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200326 },
327 },
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100328 waw2Pools: {
329 // redundant block storage
330 blockRedundant: rook.ECBlockPool(k0.ceph.waw2, "waw-hdd-redundant-2") {
331 spec: {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200332 failureDomain: "host",
333 erasureCoded: {
334 dataChunks: 2,
335 codingChunks: 1,
336 },
337 },
338 },
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100339 // paranoid block storage (3 replicas)
340 blockParanoid: rook.ReplicatedBlockPool(k0.ceph.waw2, "waw-hdd-paranoid-2") {
341 spec: {
342 failureDomain: "host",
343 replicated: {
344 size: 3,
345 },
346 },
347 },
348 // yolo block storage (no replicas!)
349 blockYolo: rook.ReplicatedBlockPool(k0.ceph.waw2, "waw-hdd-yolo-2") {
350 spec: {
351 failureDomain: "host",
352 replicated: {
353 size: 1,
354 },
355 },
356 },
357 objectRedundant: rook.S3ObjectStore(k0.ceph.waw2, "waw-hdd-redundant-2-object") {
358 spec: {
359 metadataPool: {
360 failureDomain: "host",
361 replicated: { size: 3 },
362 },
363 dataPool: {
364 failureDomain: "host",
365 erasureCoded: {
366 dataChunks: 2,
367 codingChunks: 1,
368 },
369 },
370 },
371 },
372 },
373 waw3: rook.Cluster(k0.cluster.rook, "ceph-waw3") {
374 spec: {
375 mon: {
Sergiusz Bazanski0d833002020-02-15 12:47:34 +0100376 count: 1,
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100377 allowMultiplePerNode: false,
378 },
379 storage: {
380 useAllNodes: false,
381 useAllDevices: false,
382 config: {
383 databaseSizeMB: "1024",
384 journalSizeMB: "1024",
385 },
386 nodes: [
387 {
388 name: "dcr01s22.hswaw.net",
389 location: "rack=dcr01 host=dcr01s22",
390 devices: [
391 // https://github.com/rook/rook/issues/1228
392 //{ name: "disk/by-id/wwan-0x" + wwan }
393 //for wwan in [
394 // "5000c5008508c433",
395 // "5000c500850989cf",
396 // "5000c5008508f843",
397 // "5000c5008508baf7",
398 //]
399 { name: "sdn" },
400 { name: "sda" },
401 { name: "sdb" },
402 { name: "sdc" },
403 ],
404 },
405 {
406 name: "dcr01s24.hswaw.net",
407 location: "rack=dcr01 host=dcr01s22",
408 devices: [
409 // https://github.com/rook/rook/issues/1228
410 //{ name: "disk/by-id/wwan-0x" + wwan }
411 //for wwan in [
412 // "5000c5008508ee03",
413 // "5000c5008508c9ef",
414 // "5000c5008508df33",
415 // "5000c5008508dd3b",
416 //]
417 { name: "sdm" },
418 { name: "sda" },
419 { name: "sdb" },
420 { name: "sdc" },
421 ],
422 },
423 ],
424 },
425 benji:: {
Sergiusz Bazanski0c337ac2019-12-21 23:45:07 +0100426 metadataStorageClass: "waw-hdd-redundant-3",
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100427 encryptionPassword: std.split((importstr "../secrets/plain/k0-benji-encryption-password"), '\n')[0],
428 pools: [
Sergiusz Bazanski0c337ac2019-12-21 23:45:07 +0100429 "waw-hdd-redundant-3",
430 "waw-hdd-redundant-3-metadata",
431 "waw-hdd-yolo-3",
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100432 ],
433 s3Configuration: {
434 awsAccessKeyId: "RPYZIROFXNLQVU2WJ4R3",
435 awsSecretAccessKey: std.split((importstr "../secrets/plain/k0-benji-secret-access-key"), '\n')[0],
436 bucketName: "benji-k0-backups-waw3",
437 endpointUrl: "https://s3.eu-central-1.wasabisys.com/",
438 },
439 }
440 },
441 },
442 waw3Pools: {
443 // redundant block storage
444 blockRedundant: rook.ECBlockPool(k0.ceph.waw3, "waw-hdd-redundant-3") {
445 metadataReplicas: 2,
446 spec: {
447 failureDomain: "host",
448 replicated: {
449 size: 2,
450 },
451 },
452 },
453 // yolo block storage (low usage, no host redundancy)
454 blockYolo: rook.ReplicatedBlockPool(k0.ceph.waw3, "waw-hdd-yolo-3") {
455 spec: {
456 failureDomain: "osd",
457 erasureCoded: {
458 dataChunks: 12,
459 codingChunks: 4,
460 },
461 },
462 },
463 objectRedundant: rook.S3ObjectStore(k0.ceph.waw3, "waw-hdd-redundant-3-object") {
464 spec: {
465 metadataPool: {
466 failureDomain: "host",
467 replicated: { size: 2 },
468 },
469 dataPool: {
470 failureDomain: "host",
471 replicated: { size: 2 },
472 },
473 },
474 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200475 },
476 },
Sergiusz Bazanski9496d992019-09-02 16:32:40 +0200477
478 # Used for owncloud.hackerspace.pl, which for now lices on boston-packets.hackerspace.pl.
Sergiusz Bazanski114edc22020-02-18 22:54:18 +0100479 nextcloudWaw3: kube.CephObjectStoreUser("nextcloud") {
Sergiusz Bazanskifd323a02019-11-17 19:49:04 +0100480 metadata+: {
481 namespace: "ceph-waw3",
482 },
483 spec: {
484 store: "waw-hdd-redundant-3-object",
485 displayName: "nextcloud",
486 },
487 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200488 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100489}