blob: f781d1f9dd62c5634db8db8820ddaedb9ad00cad [file] [log] [blame]
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +01001# Top level cluster configuration.
2
3local kube = import "../../kube/kube.libsonnet";
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +02004local policies = import "../../kube/policies.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +02005
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +01006local calico = import "lib/calico.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +02007local certmanager = import "lib/cert-manager.libsonnet";
8local cockroachdb = import "lib/cockroachdb.libsonnet";
9local coredns = import "lib/coredns.libsonnet";
Sergiusz Bazanski1e565dc2019-01-18 09:40:59 +010010local metallb = import "lib/metallb.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +020011local metrics = import "lib/metrics.libsonnet";
Sergiusz Bazanskia9c7e862019-04-01 17:56:28 +020012local nginx = import "lib/nginx.libsonnet";
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020013local prodvider = import "lib/prodvider.libsonnet";
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020014local registry = import "lib/registry.libsonnet";
Sergiusz Bazanskib7fcc672019-04-01 18:40:50 +020015local rook = import "lib/rook.libsonnet";
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020016local pki = import "lib/pki.libsonnet";
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010017
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020018local Cluster(short, realm) = {
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010019 local cluster = self,
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020020 local cfg = cluster.cfg,
21
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020022 short:: short,
23 realm:: realm,
24 fqdn:: "%s.%s" % [cluster.short, cluster.realm],
25
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020026 cfg:: {
27 // Storage class used for internal services (like registry). This must
28 // be set to a valid storage class. This can either be a cloud provider class
29 // (when running on GKE &co) or a storage class created using rook.
30 storageClassNameRedundant: error "storageClassNameRedundant must be set",
31 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010032
33 // These are required to let the API Server contact kubelets.
34 crAPIServerToKubelet: kube.ClusterRole("system:kube-apiserver-to-kubelet") {
35 metadata+: {
36 annotations+: {
37 "rbac.authorization.kubernetes.io/autoupdate": "true",
38 },
39 labels+: {
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020040 "kubernetes.io/bootstrapping": "rbac-defaults",
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010041 },
42 },
43 rules: [
44 {
45 apiGroups: [""],
46 resources: ["nodes/%s" % r for r in [ "proxy", "stats", "log", "spec", "metrics" ]],
47 verbs: ["*"],
48 },
49 ],
50 },
Sergiusz Bazanski5bebbeb2019-01-13 22:08:05 +010051 crbAPIServer: kube.ClusterRoleBinding("system:kube-apiserver") {
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010052 roleRef: {
53 apiGroup: "rbac.authorization.k8s.io",
54 kind: "ClusterRole",
55 name: cluster.crAPIServerToKubelet.metadata.name,
56 },
57 subjects: [
58 {
59 apiGroup: "rbac.authorization.k8s.io",
60 kind: "User",
61 # A cluster API Server authenticates with a certificate whose CN is == to the FQDN of the cluster.
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020062 name: cluster.fqdn,
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010063 },
64 ],
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +010065 },
66
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020067 // This ClusteRole is bound to all humans that log in via prodaccess/prodvider/SSO.
68 // It should allow viewing of non-sensitive data for debugability and openness.
69 crViewer: kube.ClusterRole("system:viewer") {
70 rules: [
71 {
72 apiGroups: [""],
73 resources: [
74 "nodes",
75 "namespaces",
76 "pods",
77 "configmaps",
78 "services",
79 ],
80 verbs: ["list"],
81 },
82 {
83 apiGroups: ["metrics.k8s.io"],
84 resources: [
85 "nodes",
86 "pods",
87 ],
88 verbs: ["list"],
89 },
90 {
91 apiGroups: ["apps"],
92 resources: [
93 "statefulsets",
94 ],
95 verbs: ["list"],
96 },
97 {
98 apiGroups: ["extensions"],
99 resources: [
100 "deployments",
101 "ingresses",
102 ],
103 verbs: ["list"],
104 }
105 ],
106 },
107 // This ClusterRole is applied (scoped to personal namespace) to all humans.
108 crFullInNamespace: kube.ClusterRole("system:admin-namespace") {
109 rules: [
110 {
Sergiusz Bazanskia168c502020-05-11 20:49:31 +0200111 apiGroups: ["", "extensions", "apps"],
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200112 resources: ["*"],
113 verbs: ["*"],
114 },
Sergiusz Bazanskia168c502020-05-11 20:49:31 +0200115 {
116 apiGroups: ["batch"],
117 resources: ["jobs", "cronjobs"],
118 verbs: ["*"],
119 },
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200120 ],
121 },
122 // This ClusterRoleBindings allows root access to cluster admins.
123 crbAdmins: kube.ClusterRoleBinding("system:admins") {
124 roleRef: {
125 apiGroup: "rbac.authorization.k8s.io",
126 kind: "ClusterRole",
127 name: "cluster-admin",
128 },
129 subjects: [
130 {
131 apiGroup: "rbac.authorization.k8s.io",
132 kind: "User",
133 name: user + "@hackerspace.pl",
134 } for user in [
135 "q3k",
136 "implr",
137 "informatic",
138 ]
139 ],
140 },
141
142 podSecurityPolicies: policies.Cluster {},
143
144 allowInsecureNamespaces: [
145 policies.AllowNamespaceInsecure("kube-system"),
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100146 policies.AllowNamespaceInsecure("metallb-system"),
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200147 # TODO(q3k): fix this?
148 policies.AllowNamespaceInsecure("ceph-waw2"),
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100149 policies.AllowNamespaceInsecure("ceph-waw3"),
Sergiusz Bazanski5f3a5e02019-09-25 02:51:51 +0200150 policies.AllowNamespaceInsecure("matrix"),
151 policies.AllowNamespaceInsecure("registry"),
152 policies.AllowNamespaceInsecure("internet"),
Bartosz Stebel98ef1512020-04-23 23:30:23 +0200153 # TODO(implr): restricted policy with CAP_NET_ADMIN and tuntap, but no full root
154 policies.AllowNamespaceInsecure("implr-vpn"),
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200155 ],
156
157 // Allow all service accounts (thus all controllers) to create secure pods.
158 crbAllowServiceAccountsSecure: kube.ClusterRoleBinding("policy:allow-all-secure") {
159 roleRef_: cluster.podSecurityPolicies.secureRole,
160 subjects: [
161 {
162 kind: "Group",
163 apiGroup: "rbac.authorization.k8s.io",
164 name: "system:serviceaccounts",
165 }
166 ],
167 },
168
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +0100169 // Calico network fabric
170 calico: calico.Environment {},
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +0100171 // CoreDNS for this cluster.
Sergiusz Bazanski54490d32019-10-02 20:47:18 +0200172 dns: coredns.Environment {
173 cfg+: {
174 cluster_domains: [
175 "cluster.local",
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200176 cluster.fqdn,
Sergiusz Bazanski54490d32019-10-02 20:47:18 +0200177 ],
178 },
179 },
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +0100180 // Metrics Server
181 metrics: metrics.Environment {},
Sergiusz Bazanski1e565dc2019-01-18 09:40:59 +0100182 // Metal Load Balancer
Sergiusz Bazanski14cbacb2019-04-01 18:00:44 +0200183 metallb: metallb.Environment {
184 cfg+: {
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100185 peers: [
186 {
187 "peer-address": "185.236.240.33",
188 "peer-asn": 65001,
189 "my-asn": 65002,
190 },
191 ],
Sergiusz Bazanski14cbacb2019-04-01 18:00:44 +0200192 addressPools: [
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100193 {
194 name: "public-v4-1",
195 protocol: "bgp",
196 addresses: [
197 "185.236.240.48/28",
198 ],
199 },
200 {
201 name: "public-v4-2",
202 protocol: "bgp",
203 addresses: [
204 "185.236.240.112/28"
205 ],
206 },
Sergiusz Bazanski14cbacb2019-04-01 18:00:44 +0200207 ],
208 },
209 },
Sergiusz Bazanskia9c7e862019-04-01 17:56:28 +0200210 // Main nginx Ingress Controller
211 nginx: nginx.Environment {},
Piotr Dobrowolski79ddbc52019-04-02 13:20:15 +0200212 certmanager: certmanager.Environment {},
Sergiusz Bazanskie31d64f2019-10-02 20:59:26 +0200213 issuer: kube.ClusterIssuer("letsencrypt-prod") {
Piotr Dobrowolski3187c592019-04-02 14:44:04 +0200214 spec: {
215 acme: {
216 server: "https://acme-v02.api.letsencrypt.org/directory",
217 email: "bofh@hackerspace.pl",
218 privateKeySecretRef: {
219 name: "letsencrypt-prod"
220 },
221 http01: {},
222 },
223 },
224 },
Sergiusz Bazanskic6da1272019-04-02 00:06:13 +0200225
Sergiusz Bazanskib7fcc672019-04-01 18:40:50 +0200226 // Rook Ceph storage
Sergiusz Bazanskic3b0f762019-06-20 16:42:19 +0200227 rook: rook.Operator {
228 operator+: {
229 spec+: {
230 // TODO(q3k): Bring up the operator again when stability gets fixed
231 // See: https://github.com/rook/rook/issues/3059#issuecomment-492378873
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200232 replicas: 1,
Sergiusz Bazanskic3b0f762019-06-20 16:42:19 +0200233 },
234 },
235 },
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200236
237 // Docker registry
238 registry: registry.Environment {
239 cfg+: {
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200240 domain: "registry.%s" % [cluster.fqdn],
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200241 storageClassName: cfg.storageClassNameParanoid,
242 objectStorageName: "waw-hdd-redundant-2-object",
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200243 },
244 },
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200245
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200246 // TLS PKI machinery
247 pki: pki.Environment(cluster.short, cluster.realm),
248
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200249 // Prodvider
Sergiusz Bazanskid186e942019-10-04 13:46:39 +0200250 prodvider: prodvider.Environment {
251 cfg+: {
252 apiEndpoint: "kubernetes.default.svc.%s" % [cluster.fqdn],
253 },
254 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100255};
256
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +0100257
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100258{
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200259 k0: {
260 local k0 = self,
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200261 cluster: Cluster("k0", "hswaw.net") {
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200262 cfg+: {
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100263 storageClassNameParanoid: k0.ceph.waw2Pools.blockParanoid.name,
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200264 },
265 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200266 cockroach: {
Sergiusz Bazanskid5338922019-08-09 14:13:50 +0200267 waw2: cockroachdb.Cluster("crdb-waw1") {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200268 cfg+: {
269 topology: [
Sergiusz Bazanski184678b2019-06-22 02:07:41 +0200270 { name: "bc01n01", node: "bc01n01.hswaw.net" },
271 { name: "bc01n02", node: "bc01n02.hswaw.net" },
272 { name: "bc01n03", node: "bc01n03.hswaw.net" },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200273 ],
Sergiusz Bazanskid5338922019-08-09 14:13:50 +0200274 hostPath: "/var/db/crdb-waw1",
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200275 },
276 },
Sergiusz Bazanski1fad2e52019-08-01 20:16:27 +0200277 clients: {
278 cccampix: k0.cockroach.waw2.Client("cccampix"),
279 cccampixDev: k0.cockroach.waw2.Client("cccampix-dev"),
Sergiusz Bazanski90e8e682020-03-25 10:55:05 +0100280 buglessDev: k0.cockroach.waw2.Client("bugless-dev"),
Sergiusz Bazanski1fad2e52019-08-01 20:16:27 +0200281 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200282 },
283 ceph: {
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200284 // waw1 cluster - dead as of 2019/08/06, data corruption
285 // waw2 cluster
286 waw2: rook.Cluster(k0.cluster.rook, "ceph-waw2") {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200287 spec: {
288 mon: {
289 count: 3,
290 allowMultiplePerNode: false,
291 },
292 storage: {
293 useAllNodes: false,
294 useAllDevices: false,
295 config: {
296 databaseSizeMB: "1024",
297 journalSizeMB: "1024",
298 },
299 nodes: [
300 {
301 name: "bc01n01.hswaw.net",
302 location: "rack=dcr01 chassis=bc01 host=bc01n01",
303 devices: [ { name: "sda" } ],
304 },
305 {
306 name: "bc01n02.hswaw.net",
307 location: "rack=dcr01 chassis=bc01 host=bc01n02",
308 devices: [ { name: "sda" } ],
309 },
310 {
311 name: "bc01n03.hswaw.net",
312 location: "rack=dcr01 chassis=bc01 host=bc01n03",
313 devices: [ { name: "sda" } ],
314 },
315 ],
316 },
Sergiusz Bazanski13bb1bf2019-08-31 16:33:29 +0200317 benji:: {
318 metadataStorageClass: "waw-hdd-paranoid-2",
319 encryptionPassword: std.split((importstr "../secrets/plain/k0-benji-encryption-password"), '\n')[0],
320 pools: [
321 "waw-hdd-redundant-2",
322 "waw-hdd-redundant-2-metadata",
323 "waw-hdd-paranoid-2",
324 "waw-hdd-yolo-2",
325 ],
326 s3Configuration: {
327 awsAccessKeyId: "RPYZIROFXNLQVU2WJ4R3",
328 awsSecretAccessKey: std.split((importstr "../secrets/plain/k0-benji-secret-access-key"), '\n')[0],
329 bucketName: "benji-k0-backups",
330 endpointUrl: "https://s3.eu-central-1.wasabisys.com/",
331 },
332 }
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200333 },
334 },
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100335 waw2Pools: {
336 // redundant block storage
337 blockRedundant: rook.ECBlockPool(k0.ceph.waw2, "waw-hdd-redundant-2") {
338 spec: {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200339 failureDomain: "host",
340 erasureCoded: {
341 dataChunks: 2,
342 codingChunks: 1,
343 },
344 },
345 },
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100346 // paranoid block storage (3 replicas)
347 blockParanoid: rook.ReplicatedBlockPool(k0.ceph.waw2, "waw-hdd-paranoid-2") {
348 spec: {
349 failureDomain: "host",
350 replicated: {
351 size: 3,
352 },
353 },
354 },
355 // yolo block storage (no replicas!)
356 blockYolo: rook.ReplicatedBlockPool(k0.ceph.waw2, "waw-hdd-yolo-2") {
357 spec: {
358 failureDomain: "host",
359 replicated: {
360 size: 1,
361 },
362 },
363 },
364 objectRedundant: rook.S3ObjectStore(k0.ceph.waw2, "waw-hdd-redundant-2-object") {
365 spec: {
366 metadataPool: {
367 failureDomain: "host",
368 replicated: { size: 3 },
369 },
370 dataPool: {
371 failureDomain: "host",
372 erasureCoded: {
373 dataChunks: 2,
374 codingChunks: 1,
375 },
376 },
377 },
378 },
379 },
380 waw3: rook.Cluster(k0.cluster.rook, "ceph-waw3") {
381 spec: {
382 mon: {
Sergiusz Bazanski0dcc7022020-03-28 17:58:19 +0100383 count: 3,
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100384 allowMultiplePerNode: false,
385 },
386 storage: {
387 useAllNodes: false,
388 useAllDevices: false,
389 config: {
390 databaseSizeMB: "1024",
391 journalSizeMB: "1024",
392 },
393 nodes: [
394 {
395 name: "dcr01s22.hswaw.net",
396 location: "rack=dcr01 host=dcr01s22",
397 devices: [
398 // https://github.com/rook/rook/issues/1228
399 //{ name: "disk/by-id/wwan-0x" + wwan }
400 //for wwan in [
401 // "5000c5008508c433",
402 // "5000c500850989cf",
403 // "5000c5008508f843",
404 // "5000c5008508baf7",
405 //]
406 { name: "sdn" },
407 { name: "sda" },
408 { name: "sdb" },
409 { name: "sdc" },
410 ],
411 },
412 {
413 name: "dcr01s24.hswaw.net",
414 location: "rack=dcr01 host=dcr01s22",
415 devices: [
416 // https://github.com/rook/rook/issues/1228
417 //{ name: "disk/by-id/wwan-0x" + wwan }
418 //for wwan in [
419 // "5000c5008508ee03",
420 // "5000c5008508c9ef",
421 // "5000c5008508df33",
422 // "5000c5008508dd3b",
423 //]
424 { name: "sdm" },
425 { name: "sda" },
426 { name: "sdb" },
427 { name: "sdc" },
428 ],
429 },
430 ],
431 },
432 benji:: {
Sergiusz Bazanski0c337ac2019-12-21 23:45:07 +0100433 metadataStorageClass: "waw-hdd-redundant-3",
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100434 encryptionPassword: std.split((importstr "../secrets/plain/k0-benji-encryption-password"), '\n')[0],
435 pools: [
Sergiusz Bazanski0c337ac2019-12-21 23:45:07 +0100436 "waw-hdd-redundant-3",
437 "waw-hdd-redundant-3-metadata",
438 "waw-hdd-yolo-3",
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100439 ],
440 s3Configuration: {
441 awsAccessKeyId: "RPYZIROFXNLQVU2WJ4R3",
442 awsSecretAccessKey: std.split((importstr "../secrets/plain/k0-benji-secret-access-key"), '\n')[0],
443 bucketName: "benji-k0-backups-waw3",
444 endpointUrl: "https://s3.eu-central-1.wasabisys.com/",
445 },
446 }
447 },
448 },
449 waw3Pools: {
450 // redundant block storage
451 blockRedundant: rook.ECBlockPool(k0.ceph.waw3, "waw-hdd-redundant-3") {
452 metadataReplicas: 2,
453 spec: {
454 failureDomain: "host",
455 replicated: {
456 size: 2,
457 },
458 },
459 },
460 // yolo block storage (low usage, no host redundancy)
461 blockYolo: rook.ReplicatedBlockPool(k0.ceph.waw3, "waw-hdd-yolo-3") {
462 spec: {
463 failureDomain: "osd",
464 erasureCoded: {
465 dataChunks: 12,
466 codingChunks: 4,
467 },
468 },
469 },
470 objectRedundant: rook.S3ObjectStore(k0.ceph.waw3, "waw-hdd-redundant-3-object") {
471 spec: {
472 metadataPool: {
473 failureDomain: "host",
474 replicated: { size: 2 },
475 },
476 dataPool: {
477 failureDomain: "host",
478 replicated: { size: 2 },
479 },
480 },
481 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200482 },
483 },
Sergiusz Bazanski9496d992019-09-02 16:32:40 +0200484
485 # Used for owncloud.hackerspace.pl, which for now lices on boston-packets.hackerspace.pl.
Sergiusz Bazanski114edc22020-02-18 22:54:18 +0100486 nextcloudWaw3: kube.CephObjectStoreUser("nextcloud") {
Sergiusz Bazanskifd323a02019-11-17 19:49:04 +0100487 metadata+: {
488 namespace: "ceph-waw3",
489 },
490 spec: {
491 store: "waw-hdd-redundant-3-object",
492 displayName: "nextcloud",
493 },
494 },
Sergiusz Bazanski741c08f2020-05-14 20:11:58 +0200495
496 # nuke@hackerspace.pl's personal storage.
497 nukePersonalWaw3: kube.CephObjectStoreuser("nuke-personal") {
498 metadata+: {
499 namespace: "ceph-waw3",
500 },
501 spec: {
502 store: "waw-hdd-redundant-3-object",
503 displayName: "nuke-personal",
504 },
505 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200506 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100507}