blob: 02099cdf8e8d05dae942d381c377037ff32d7840 [file] [log] [blame]
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +01001# Top level cluster configuration.
2
3local kube = import "../../kube/kube.libsonnet";
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +02004local policies = import "../../kube/policies.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +02005
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +01006local calico = import "lib/calico.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +02007local certmanager = import "lib/cert-manager.libsonnet";
8local cockroachdb = import "lib/cockroachdb.libsonnet";
9local coredns = import "lib/coredns.libsonnet";
Sergiusz Bazanski1e565dc2019-01-18 09:40:59 +010010local metallb = import "lib/metallb.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +020011local metrics = import "lib/metrics.libsonnet";
Sergiusz Bazanskia9c7e862019-04-01 17:56:28 +020012local nginx = import "lib/nginx.libsonnet";
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020013local prodvider = import "lib/prodvider.libsonnet";
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020014local registry = import "lib/registry.libsonnet";
Sergiusz Bazanskib7fcc672019-04-01 18:40:50 +020015local rook = import "lib/rook.libsonnet";
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020016local pki = import "lib/pki.libsonnet";
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010017
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020018local Cluster(short, realm) = {
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010019 local cluster = self,
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020020 local cfg = cluster.cfg,
21
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020022 short:: short,
23 realm:: realm,
24 fqdn:: "%s.%s" % [cluster.short, cluster.realm],
25
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020026 cfg:: {
27 // Storage class used for internal services (like registry). This must
28 // be set to a valid storage class. This can either be a cloud provider class
29 // (when running on GKE &co) or a storage class created using rook.
30 storageClassNameRedundant: error "storageClassNameRedundant must be set",
31 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010032
33 // These are required to let the API Server contact kubelets.
34 crAPIServerToKubelet: kube.ClusterRole("system:kube-apiserver-to-kubelet") {
35 metadata+: {
36 annotations+: {
37 "rbac.authorization.kubernetes.io/autoupdate": "true",
38 },
39 labels+: {
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020040 "kubernetes.io/bootstrapping": "rbac-defaults",
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010041 },
42 },
43 rules: [
44 {
45 apiGroups: [""],
46 resources: ["nodes/%s" % r for r in [ "proxy", "stats", "log", "spec", "metrics" ]],
47 verbs: ["*"],
48 },
49 ],
50 },
Sergiusz Bazanski5bebbeb2019-01-13 22:08:05 +010051 crbAPIServer: kube.ClusterRoleBinding("system:kube-apiserver") {
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010052 roleRef: {
53 apiGroup: "rbac.authorization.k8s.io",
54 kind: "ClusterRole",
55 name: cluster.crAPIServerToKubelet.metadata.name,
56 },
57 subjects: [
58 {
59 apiGroup: "rbac.authorization.k8s.io",
60 kind: "User",
61 # A cluster API Server authenticates with a certificate whose CN is == to the FQDN of the cluster.
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020062 name: cluster.fqdn,
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010063 },
64 ],
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +010065 },
66
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020067 // This ClusteRole is bound to all humans that log in via prodaccess/prodvider/SSO.
68 // It should allow viewing of non-sensitive data for debugability and openness.
69 crViewer: kube.ClusterRole("system:viewer") {
70 rules: [
71 {
72 apiGroups: [""],
73 resources: [
74 "nodes",
75 "namespaces",
76 "pods",
77 "configmaps",
78 "services",
79 ],
80 verbs: ["list"],
81 },
82 {
83 apiGroups: ["metrics.k8s.io"],
84 resources: [
85 "nodes",
86 "pods",
87 ],
88 verbs: ["list"],
89 },
90 {
91 apiGroups: ["apps"],
92 resources: [
93 "statefulsets",
94 ],
95 verbs: ["list"],
96 },
97 {
98 apiGroups: ["extensions"],
99 resources: [
100 "deployments",
101 "ingresses",
102 ],
103 verbs: ["list"],
104 }
105 ],
106 },
107 // This ClusterRole is applied (scoped to personal namespace) to all humans.
108 crFullInNamespace: kube.ClusterRole("system:admin-namespace") {
109 rules: [
110 {
Sergiusz Bazanskia168c502020-05-11 20:49:31 +0200111 apiGroups: ["", "extensions", "apps"],
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200112 resources: ["*"],
113 verbs: ["*"],
114 },
Sergiusz Bazanskia168c502020-05-11 20:49:31 +0200115 {
116 apiGroups: ["batch"],
117 resources: ["jobs", "cronjobs"],
118 verbs: ["*"],
119 },
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200120 ],
121 },
122 // This ClusterRoleBindings allows root access to cluster admins.
123 crbAdmins: kube.ClusterRoleBinding("system:admins") {
124 roleRef: {
125 apiGroup: "rbac.authorization.k8s.io",
126 kind: "ClusterRole",
127 name: "cluster-admin",
128 },
129 subjects: [
130 {
131 apiGroup: "rbac.authorization.k8s.io",
132 kind: "User",
133 name: user + "@hackerspace.pl",
134 } for user in [
135 "q3k",
136 "implr",
137 "informatic",
138 ]
139 ],
140 },
141
142 podSecurityPolicies: policies.Cluster {},
143
144 allowInsecureNamespaces: [
145 policies.AllowNamespaceInsecure("kube-system"),
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100146 policies.AllowNamespaceInsecure("metallb-system"),
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200147 # TODO(q3k): fix this?
148 policies.AllowNamespaceInsecure("ceph-waw2"),
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100149 policies.AllowNamespaceInsecure("ceph-waw3"),
Sergiusz Bazanski5f3a5e02019-09-25 02:51:51 +0200150 policies.AllowNamespaceInsecure("matrix"),
151 policies.AllowNamespaceInsecure("registry"),
152 policies.AllowNamespaceInsecure("internet"),
Bartosz Stebel98ef1512020-04-23 23:30:23 +0200153 # TODO(implr): restricted policy with CAP_NET_ADMIN and tuntap, but no full root
154 policies.AllowNamespaceInsecure("implr-vpn"),
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200155 ],
156
157 // Allow all service accounts (thus all controllers) to create secure pods.
158 crbAllowServiceAccountsSecure: kube.ClusterRoleBinding("policy:allow-all-secure") {
159 roleRef_: cluster.podSecurityPolicies.secureRole,
160 subjects: [
161 {
162 kind: "Group",
163 apiGroup: "rbac.authorization.k8s.io",
164 name: "system:serviceaccounts",
165 }
166 ],
167 },
168
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +0100169 // Calico network fabric
170 calico: calico.Environment {},
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +0100171 // CoreDNS for this cluster.
Sergiusz Bazanski54490d32019-10-02 20:47:18 +0200172 dns: coredns.Environment {
173 cfg+: {
174 cluster_domains: [
175 "cluster.local",
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200176 cluster.fqdn,
Sergiusz Bazanski54490d32019-10-02 20:47:18 +0200177 ],
178 },
179 },
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +0100180 // Metrics Server
181 metrics: metrics.Environment {},
Sergiusz Bazanski1e565dc2019-01-18 09:40:59 +0100182 // Metal Load Balancer
Sergiusz Bazanski14cbacb2019-04-01 18:00:44 +0200183 metallb: metallb.Environment {
184 cfg+: {
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100185 peers: [
186 {
187 "peer-address": "185.236.240.33",
188 "peer-asn": 65001,
189 "my-asn": 65002,
190 },
191 ],
Sergiusz Bazanski14cbacb2019-04-01 18:00:44 +0200192 addressPools: [
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100193 {
194 name: "public-v4-1",
195 protocol: "bgp",
196 addresses: [
197 "185.236.240.48/28",
198 ],
199 },
200 {
201 name: "public-v4-2",
202 protocol: "bgp",
203 addresses: [
204 "185.236.240.112/28"
205 ],
206 },
Sergiusz Bazanski14cbacb2019-04-01 18:00:44 +0200207 ],
208 },
209 },
Sergiusz Bazanskia9c7e862019-04-01 17:56:28 +0200210 // Main nginx Ingress Controller
211 nginx: nginx.Environment {},
Piotr Dobrowolski79ddbc52019-04-02 13:20:15 +0200212 certmanager: certmanager.Environment {},
Sergiusz Bazanskie31d64f2019-10-02 20:59:26 +0200213 issuer: kube.ClusterIssuer("letsencrypt-prod") {
Piotr Dobrowolski3187c592019-04-02 14:44:04 +0200214 spec: {
215 acme: {
216 server: "https://acme-v02.api.letsencrypt.org/directory",
217 email: "bofh@hackerspace.pl",
218 privateKeySecretRef: {
219 name: "letsencrypt-prod"
220 },
221 http01: {},
222 },
223 },
224 },
Sergiusz Bazanskic6da1272019-04-02 00:06:13 +0200225
Sergiusz Bazanskib7fcc672019-04-01 18:40:50 +0200226 // Rook Ceph storage
Sergiusz Bazanskic3b0f762019-06-20 16:42:19 +0200227 rook: rook.Operator {
228 operator+: {
229 spec+: {
230 // TODO(q3k): Bring up the operator again when stability gets fixed
231 // See: https://github.com/rook/rook/issues/3059#issuecomment-492378873
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200232 replicas: 1,
Sergiusz Bazanskic3b0f762019-06-20 16:42:19 +0200233 },
234 },
235 },
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200236
237 // Docker registry
238 registry: registry.Environment {
239 cfg+: {
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200240 domain: "registry.%s" % [cluster.fqdn],
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200241 storageClassName: cfg.storageClassNameParanoid,
242 objectStorageName: "waw-hdd-redundant-2-object",
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200243 },
244 },
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200245
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200246 // TLS PKI machinery
247 pki: pki.Environment(cluster.short, cluster.realm),
248
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200249 // Prodvider
Sergiusz Bazanskid186e942019-10-04 13:46:39 +0200250 prodvider: prodvider.Environment {
251 cfg+: {
252 apiEndpoint: "kubernetes.default.svc.%s" % [cluster.fqdn],
253 },
254 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100255};
256
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +0100257
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100258{
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200259 k0: {
260 local k0 = self,
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200261 cluster: Cluster("k0", "hswaw.net") {
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200262 cfg+: {
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100263 storageClassNameParanoid: k0.ceph.waw2Pools.blockParanoid.name,
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200264 },
265 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200266 cockroach: {
Sergiusz Bazanskid5338922019-08-09 14:13:50 +0200267 waw2: cockroachdb.Cluster("crdb-waw1") {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200268 cfg+: {
269 topology: [
Sergiusz Bazanski184678b2019-06-22 02:07:41 +0200270 { name: "bc01n01", node: "bc01n01.hswaw.net" },
271 { name: "bc01n02", node: "bc01n02.hswaw.net" },
272 { name: "bc01n03", node: "bc01n03.hswaw.net" },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200273 ],
Sergiusz Bazanskid5338922019-08-09 14:13:50 +0200274 hostPath: "/var/db/crdb-waw1",
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200275 },
276 },
Sergiusz Bazanski1fad2e52019-08-01 20:16:27 +0200277 clients: {
278 cccampix: k0.cockroach.waw2.Client("cccampix"),
279 cccampixDev: k0.cockroach.waw2.Client("cccampix-dev"),
Sergiusz Bazanski90e8e682020-03-25 10:55:05 +0100280 buglessDev: k0.cockroach.waw2.Client("bugless-dev"),
Piotr Dobrowolski42da0e92020-05-30 14:32:27 +0200281 sso: k0.cockroach.waw2.Client("sso"),
Sergiusz Bazanski1fad2e52019-08-01 20:16:27 +0200282 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200283 },
284 ceph: {
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200285 // waw1 cluster - dead as of 2019/08/06, data corruption
286 // waw2 cluster
287 waw2: rook.Cluster(k0.cluster.rook, "ceph-waw2") {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200288 spec: {
289 mon: {
290 count: 3,
291 allowMultiplePerNode: false,
292 },
293 storage: {
294 useAllNodes: false,
295 useAllDevices: false,
296 config: {
297 databaseSizeMB: "1024",
298 journalSizeMB: "1024",
299 },
300 nodes: [
301 {
302 name: "bc01n01.hswaw.net",
303 location: "rack=dcr01 chassis=bc01 host=bc01n01",
304 devices: [ { name: "sda" } ],
305 },
306 {
307 name: "bc01n02.hswaw.net",
308 location: "rack=dcr01 chassis=bc01 host=bc01n02",
309 devices: [ { name: "sda" } ],
310 },
311 {
312 name: "bc01n03.hswaw.net",
313 location: "rack=dcr01 chassis=bc01 host=bc01n03",
314 devices: [ { name: "sda" } ],
315 },
316 ],
317 },
Sergiusz Bazanski13bb1bf2019-08-31 16:33:29 +0200318 benji:: {
319 metadataStorageClass: "waw-hdd-paranoid-2",
320 encryptionPassword: std.split((importstr "../secrets/plain/k0-benji-encryption-password"), '\n')[0],
321 pools: [
322 "waw-hdd-redundant-2",
323 "waw-hdd-redundant-2-metadata",
324 "waw-hdd-paranoid-2",
325 "waw-hdd-yolo-2",
326 ],
327 s3Configuration: {
328 awsAccessKeyId: "RPYZIROFXNLQVU2WJ4R3",
329 awsSecretAccessKey: std.split((importstr "../secrets/plain/k0-benji-secret-access-key"), '\n')[0],
330 bucketName: "benji-k0-backups",
331 endpointUrl: "https://s3.eu-central-1.wasabisys.com/",
332 },
333 }
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200334 },
335 },
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100336 waw2Pools: {
337 // redundant block storage
338 blockRedundant: rook.ECBlockPool(k0.ceph.waw2, "waw-hdd-redundant-2") {
339 spec: {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200340 failureDomain: "host",
341 erasureCoded: {
342 dataChunks: 2,
343 codingChunks: 1,
344 },
345 },
346 },
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100347 // paranoid block storage (3 replicas)
348 blockParanoid: rook.ReplicatedBlockPool(k0.ceph.waw2, "waw-hdd-paranoid-2") {
349 spec: {
350 failureDomain: "host",
351 replicated: {
352 size: 3,
353 },
354 },
355 },
356 // yolo block storage (no replicas!)
357 blockYolo: rook.ReplicatedBlockPool(k0.ceph.waw2, "waw-hdd-yolo-2") {
358 spec: {
359 failureDomain: "host",
360 replicated: {
361 size: 1,
362 },
363 },
364 },
365 objectRedundant: rook.S3ObjectStore(k0.ceph.waw2, "waw-hdd-redundant-2-object") {
366 spec: {
367 metadataPool: {
368 failureDomain: "host",
369 replicated: { size: 3 },
370 },
371 dataPool: {
372 failureDomain: "host",
373 erasureCoded: {
374 dataChunks: 2,
375 codingChunks: 1,
376 },
377 },
378 },
379 },
380 },
381 waw3: rook.Cluster(k0.cluster.rook, "ceph-waw3") {
382 spec: {
383 mon: {
Sergiusz Bazanski0dcc7022020-03-28 17:58:19 +0100384 count: 3,
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100385 allowMultiplePerNode: false,
386 },
387 storage: {
388 useAllNodes: false,
389 useAllDevices: false,
390 config: {
391 databaseSizeMB: "1024",
392 journalSizeMB: "1024",
393 },
394 nodes: [
395 {
396 name: "dcr01s22.hswaw.net",
397 location: "rack=dcr01 host=dcr01s22",
398 devices: [
399 // https://github.com/rook/rook/issues/1228
400 //{ name: "disk/by-id/wwan-0x" + wwan }
401 //for wwan in [
402 // "5000c5008508c433",
403 // "5000c500850989cf",
404 // "5000c5008508f843",
405 // "5000c5008508baf7",
406 //]
407 { name: "sdn" },
408 { name: "sda" },
409 { name: "sdb" },
410 { name: "sdc" },
411 ],
412 },
413 {
414 name: "dcr01s24.hswaw.net",
415 location: "rack=dcr01 host=dcr01s22",
416 devices: [
417 // https://github.com/rook/rook/issues/1228
418 //{ name: "disk/by-id/wwan-0x" + wwan }
419 //for wwan in [
420 // "5000c5008508ee03",
421 // "5000c5008508c9ef",
422 // "5000c5008508df33",
423 // "5000c5008508dd3b",
424 //]
425 { name: "sdm" },
426 { name: "sda" },
427 { name: "sdb" },
428 { name: "sdc" },
429 ],
430 },
431 ],
432 },
433 benji:: {
Sergiusz Bazanski0c337ac2019-12-21 23:45:07 +0100434 metadataStorageClass: "waw-hdd-redundant-3",
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100435 encryptionPassword: std.split((importstr "../secrets/plain/k0-benji-encryption-password"), '\n')[0],
436 pools: [
Sergiusz Bazanski0c337ac2019-12-21 23:45:07 +0100437 "waw-hdd-redundant-3",
438 "waw-hdd-redundant-3-metadata",
439 "waw-hdd-yolo-3",
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100440 ],
441 s3Configuration: {
442 awsAccessKeyId: "RPYZIROFXNLQVU2WJ4R3",
443 awsSecretAccessKey: std.split((importstr "../secrets/plain/k0-benji-secret-access-key"), '\n')[0],
444 bucketName: "benji-k0-backups-waw3",
445 endpointUrl: "https://s3.eu-central-1.wasabisys.com/",
446 },
447 }
448 },
449 },
450 waw3Pools: {
451 // redundant block storage
452 blockRedundant: rook.ECBlockPool(k0.ceph.waw3, "waw-hdd-redundant-3") {
453 metadataReplicas: 2,
454 spec: {
455 failureDomain: "host",
456 replicated: {
457 size: 2,
458 },
459 },
460 },
461 // yolo block storage (low usage, no host redundancy)
462 blockYolo: rook.ReplicatedBlockPool(k0.ceph.waw3, "waw-hdd-yolo-3") {
463 spec: {
464 failureDomain: "osd",
465 erasureCoded: {
466 dataChunks: 12,
467 codingChunks: 4,
468 },
469 },
470 },
471 objectRedundant: rook.S3ObjectStore(k0.ceph.waw3, "waw-hdd-redundant-3-object") {
472 spec: {
473 metadataPool: {
474 failureDomain: "host",
475 replicated: { size: 2 },
476 },
477 dataPool: {
478 failureDomain: "host",
479 replicated: { size: 2 },
480 },
481 },
482 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200483 },
484 },
Sergiusz Bazanski9496d992019-09-02 16:32:40 +0200485
486 # Used for owncloud.hackerspace.pl, which for now lices on boston-packets.hackerspace.pl.
Sergiusz Bazanski114edc22020-02-18 22:54:18 +0100487 nextcloudWaw3: kube.CephObjectStoreUser("nextcloud") {
Sergiusz Bazanskifd323a02019-11-17 19:49:04 +0100488 metadata+: {
489 namespace: "ceph-waw3",
490 },
491 spec: {
492 store: "waw-hdd-redundant-3-object",
493 displayName: "nextcloud",
494 },
495 },
Sergiusz Bazanski741c08f2020-05-14 20:11:58 +0200496
497 # nuke@hackerspace.pl's personal storage.
Sergiusz Bazanski1223cde2020-05-16 17:38:23 +0200498 nukePersonalWaw3: kube.CephObjectStoreUser("nuke-personal") {
Sergiusz Bazanski741c08f2020-05-14 20:11:58 +0200499 metadata+: {
500 namespace: "ceph-waw3",
501 },
502 spec: {
503 store: "waw-hdd-redundant-3-object",
504 displayName: "nuke-personal",
505 },
506 },
Patryk Jakuszewc4104322020-06-04 18:38:34 +0200507
508 # patryk@hackerspace.pl's ArmA3 mod bucket.
509 cz2ArmaModsWaw3: kube.CephObjectStoreUser("cz2-arma3mods") {
510 metadata+: {
511 namespace: "ceph-waw3",
512 },
513 spec: {
514 store: "waw-hdd-redundant-3-object",
515 displayName: "cz2-arma3mods",
516 },
517 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200518 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100519}