blob: 0706d31eae79182ae617cf4bcdb66fe746f4c370 [file] [log] [blame]
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +01001# Top level cluster configuration.
2
3local kube = import "../../kube/kube.libsonnet";
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +02004local policies = import "../../kube/policies.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +02005
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +01006local calico = import "lib/calico.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +02007local certmanager = import "lib/cert-manager.libsonnet";
8local cockroachdb = import "lib/cockroachdb.libsonnet";
9local coredns = import "lib/coredns.libsonnet";
Sergiusz Bazanski1e565dc2019-01-18 09:40:59 +010010local metallb = import "lib/metallb.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +020011local metrics = import "lib/metrics.libsonnet";
Sergiusz Bazanskia9c7e862019-04-01 17:56:28 +020012local nginx = import "lib/nginx.libsonnet";
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020013local prodvider = import "lib/prodvider.libsonnet";
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020014local registry = import "lib/registry.libsonnet";
Sergiusz Bazanskib7fcc672019-04-01 18:40:50 +020015local rook = import "lib/rook.libsonnet";
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020016local pki = import "lib/pki.libsonnet";
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010017
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020018local Cluster(short, realm) = {
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010019 local cluster = self,
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020020 local cfg = cluster.cfg,
21
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020022 short:: short,
23 realm:: realm,
24 fqdn:: "%s.%s" % [cluster.short, cluster.realm],
25
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020026 cfg:: {
27 // Storage class used for internal services (like registry). This must
28 // be set to a valid storage class. This can either be a cloud provider class
29 // (when running on GKE &co) or a storage class created using rook.
30 storageClassNameRedundant: error "storageClassNameRedundant must be set",
31 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010032
33 // These are required to let the API Server contact kubelets.
34 crAPIServerToKubelet: kube.ClusterRole("system:kube-apiserver-to-kubelet") {
35 metadata+: {
36 annotations+: {
37 "rbac.authorization.kubernetes.io/autoupdate": "true",
38 },
39 labels+: {
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020040 "kubernetes.io/bootstrapping": "rbac-defaults",
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010041 },
42 },
43 rules: [
44 {
45 apiGroups: [""],
46 resources: ["nodes/%s" % r for r in [ "proxy", "stats", "log", "spec", "metrics" ]],
47 verbs: ["*"],
48 },
49 ],
50 },
Sergiusz Bazanski5bebbeb2019-01-13 22:08:05 +010051 crbAPIServer: kube.ClusterRoleBinding("system:kube-apiserver") {
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010052 roleRef: {
53 apiGroup: "rbac.authorization.k8s.io",
54 kind: "ClusterRole",
55 name: cluster.crAPIServerToKubelet.metadata.name,
56 },
57 subjects: [
58 {
59 apiGroup: "rbac.authorization.k8s.io",
60 kind: "User",
61 # A cluster API Server authenticates with a certificate whose CN is == to the FQDN of the cluster.
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020062 name: cluster.fqdn,
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010063 },
64 ],
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +010065 },
66
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020067 // This ClusteRole is bound to all humans that log in via prodaccess/prodvider/SSO.
68 // It should allow viewing of non-sensitive data for debugability and openness.
69 crViewer: kube.ClusterRole("system:viewer") {
70 rules: [
71 {
72 apiGroups: [""],
73 resources: [
74 "nodes",
75 "namespaces",
76 "pods",
77 "configmaps",
78 "services",
79 ],
80 verbs: ["list"],
81 },
82 {
83 apiGroups: ["metrics.k8s.io"],
84 resources: [
85 "nodes",
86 "pods",
87 ],
88 verbs: ["list"],
89 },
90 {
91 apiGroups: ["apps"],
92 resources: [
93 "statefulsets",
94 ],
95 verbs: ["list"],
96 },
97 {
98 apiGroups: ["extensions"],
99 resources: [
100 "deployments",
101 "ingresses",
102 ],
103 verbs: ["list"],
104 }
105 ],
106 },
107 // This ClusterRole is applied (scoped to personal namespace) to all humans.
108 crFullInNamespace: kube.ClusterRole("system:admin-namespace") {
109 rules: [
110 {
111 apiGroups: ["*"],
112 resources: ["*"],
113 verbs: ["*"],
114 },
115 ],
116 },
117 // This ClusterRoleBindings allows root access to cluster admins.
118 crbAdmins: kube.ClusterRoleBinding("system:admins") {
119 roleRef: {
120 apiGroup: "rbac.authorization.k8s.io",
121 kind: "ClusterRole",
122 name: "cluster-admin",
123 },
124 subjects: [
125 {
126 apiGroup: "rbac.authorization.k8s.io",
127 kind: "User",
128 name: user + "@hackerspace.pl",
129 } for user in [
130 "q3k",
131 "implr",
132 "informatic",
133 ]
134 ],
135 },
136
137 podSecurityPolicies: policies.Cluster {},
138
139 allowInsecureNamespaces: [
140 policies.AllowNamespaceInsecure("kube-system"),
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100141 policies.AllowNamespaceInsecure("metallb-system"),
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200142 # TODO(q3k): fix this?
143 policies.AllowNamespaceInsecure("ceph-waw2"),
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100144 policies.AllowNamespaceInsecure("ceph-waw3"),
Sergiusz Bazanski5f3a5e02019-09-25 02:51:51 +0200145 policies.AllowNamespaceInsecure("matrix"),
146 policies.AllowNamespaceInsecure("registry"),
147 policies.AllowNamespaceInsecure("internet"),
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200148 ],
149
150 // Allow all service accounts (thus all controllers) to create secure pods.
151 crbAllowServiceAccountsSecure: kube.ClusterRoleBinding("policy:allow-all-secure") {
152 roleRef_: cluster.podSecurityPolicies.secureRole,
153 subjects: [
154 {
155 kind: "Group",
156 apiGroup: "rbac.authorization.k8s.io",
157 name: "system:serviceaccounts",
158 }
159 ],
160 },
161
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +0100162 // Calico network fabric
163 calico: calico.Environment {},
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +0100164 // CoreDNS for this cluster.
Sergiusz Bazanski54490d32019-10-02 20:47:18 +0200165 dns: coredns.Environment {
166 cfg+: {
167 cluster_domains: [
168 "cluster.local",
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200169 cluster.fqdn,
Sergiusz Bazanski54490d32019-10-02 20:47:18 +0200170 ],
171 },
172 },
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +0100173 // Metrics Server
174 metrics: metrics.Environment {},
Sergiusz Bazanski1e565dc2019-01-18 09:40:59 +0100175 // Metal Load Balancer
Sergiusz Bazanski14cbacb2019-04-01 18:00:44 +0200176 metallb: metallb.Environment {
177 cfg+: {
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100178 peers: [
179 {
180 "peer-address": "185.236.240.33",
181 "peer-asn": 65001,
182 "my-asn": 65002,
183 },
184 ],
Sergiusz Bazanski14cbacb2019-04-01 18:00:44 +0200185 addressPools: [
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100186 {
187 name: "public-v4-1",
188 protocol: "bgp",
189 addresses: [
190 "185.236.240.48/28",
191 ],
192 },
193 {
194 name: "public-v4-2",
195 protocol: "bgp",
196 addresses: [
197 "185.236.240.112/28"
198 ],
199 },
Sergiusz Bazanski14cbacb2019-04-01 18:00:44 +0200200 ],
201 },
202 },
Sergiusz Bazanskia9c7e862019-04-01 17:56:28 +0200203 // Main nginx Ingress Controller
204 nginx: nginx.Environment {},
Piotr Dobrowolski79ddbc52019-04-02 13:20:15 +0200205 certmanager: certmanager.Environment {},
Sergiusz Bazanskie31d64f2019-10-02 20:59:26 +0200206 issuer: kube.ClusterIssuer("letsencrypt-prod") {
Piotr Dobrowolski3187c592019-04-02 14:44:04 +0200207 spec: {
208 acme: {
209 server: "https://acme-v02.api.letsencrypt.org/directory",
210 email: "bofh@hackerspace.pl",
211 privateKeySecretRef: {
212 name: "letsencrypt-prod"
213 },
214 http01: {},
215 },
216 },
217 },
Sergiusz Bazanskic6da1272019-04-02 00:06:13 +0200218
Sergiusz Bazanskib7fcc672019-04-01 18:40:50 +0200219 // Rook Ceph storage
Sergiusz Bazanskic3b0f762019-06-20 16:42:19 +0200220 rook: rook.Operator {
221 operator+: {
222 spec+: {
223 // TODO(q3k): Bring up the operator again when stability gets fixed
224 // See: https://github.com/rook/rook/issues/3059#issuecomment-492378873
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200225 replicas: 1,
Sergiusz Bazanskic3b0f762019-06-20 16:42:19 +0200226 },
227 },
228 },
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200229
230 // Docker registry
231 registry: registry.Environment {
232 cfg+: {
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200233 domain: "registry.%s" % [cluster.fqdn],
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200234 storageClassName: cfg.storageClassNameParanoid,
235 objectStorageName: "waw-hdd-redundant-2-object",
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200236 },
237 },
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200238
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200239 // TLS PKI machinery
240 pki: pki.Environment(cluster.short, cluster.realm),
241
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200242 // Prodvider
Sergiusz Bazanskid186e942019-10-04 13:46:39 +0200243 prodvider: prodvider.Environment {
244 cfg+: {
245 apiEndpoint: "kubernetes.default.svc.%s" % [cluster.fqdn],
246 },
247 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100248};
249
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +0100250
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100251{
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200252 k0: {
253 local k0 = self,
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200254 cluster: Cluster("k0", "hswaw.net") {
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200255 cfg+: {
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100256 storageClassNameParanoid: k0.ceph.waw2Pools.blockParanoid.name,
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200257 },
258 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200259 cockroach: {
Sergiusz Bazanskid5338922019-08-09 14:13:50 +0200260 waw2: cockroachdb.Cluster("crdb-waw1") {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200261 cfg+: {
262 topology: [
Sergiusz Bazanski184678b2019-06-22 02:07:41 +0200263 { name: "bc01n01", node: "bc01n01.hswaw.net" },
264 { name: "bc01n02", node: "bc01n02.hswaw.net" },
265 { name: "bc01n03", node: "bc01n03.hswaw.net" },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200266 ],
Sergiusz Bazanskid5338922019-08-09 14:13:50 +0200267 hostPath: "/var/db/crdb-waw1",
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200268 },
269 },
Sergiusz Bazanski1fad2e52019-08-01 20:16:27 +0200270 clients: {
271 cccampix: k0.cockroach.waw2.Client("cccampix"),
272 cccampixDev: k0.cockroach.waw2.Client("cccampix-dev"),
273 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200274 },
275 ceph: {
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200276 // waw1 cluster - dead as of 2019/08/06, data corruption
277 // waw2 cluster
278 waw2: rook.Cluster(k0.cluster.rook, "ceph-waw2") {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200279 spec: {
280 mon: {
281 count: 3,
282 allowMultiplePerNode: false,
283 },
284 storage: {
285 useAllNodes: false,
286 useAllDevices: false,
287 config: {
288 databaseSizeMB: "1024",
289 journalSizeMB: "1024",
290 },
291 nodes: [
292 {
293 name: "bc01n01.hswaw.net",
294 location: "rack=dcr01 chassis=bc01 host=bc01n01",
295 devices: [ { name: "sda" } ],
296 },
297 {
298 name: "bc01n02.hswaw.net",
299 location: "rack=dcr01 chassis=bc01 host=bc01n02",
300 devices: [ { name: "sda" } ],
301 },
302 {
303 name: "bc01n03.hswaw.net",
304 location: "rack=dcr01 chassis=bc01 host=bc01n03",
305 devices: [ { name: "sda" } ],
306 },
307 ],
308 },
Sergiusz Bazanski13bb1bf2019-08-31 16:33:29 +0200309 benji:: {
310 metadataStorageClass: "waw-hdd-paranoid-2",
311 encryptionPassword: std.split((importstr "../secrets/plain/k0-benji-encryption-password"), '\n')[0],
312 pools: [
313 "waw-hdd-redundant-2",
314 "waw-hdd-redundant-2-metadata",
315 "waw-hdd-paranoid-2",
316 "waw-hdd-yolo-2",
317 ],
318 s3Configuration: {
319 awsAccessKeyId: "RPYZIROFXNLQVU2WJ4R3",
320 awsSecretAccessKey: std.split((importstr "../secrets/plain/k0-benji-secret-access-key"), '\n')[0],
321 bucketName: "benji-k0-backups",
322 endpointUrl: "https://s3.eu-central-1.wasabisys.com/",
323 },
324 }
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200325 },
326 },
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100327 waw2Pools: {
328 // redundant block storage
329 blockRedundant: rook.ECBlockPool(k0.ceph.waw2, "waw-hdd-redundant-2") {
330 spec: {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200331 failureDomain: "host",
332 erasureCoded: {
333 dataChunks: 2,
334 codingChunks: 1,
335 },
336 },
337 },
Serge Bazanskic33ebcc2019-11-01 18:43:45 +0100338 // paranoid block storage (3 replicas)
339 blockParanoid: rook.ReplicatedBlockPool(k0.ceph.waw2, "waw-hdd-paranoid-2") {
340 spec: {
341 failureDomain: "host",
342 replicated: {
343 size: 3,
344 },
345 },
346 },
347 // yolo block storage (no replicas!)
348 blockYolo: rook.ReplicatedBlockPool(k0.ceph.waw2, "waw-hdd-yolo-2") {
349 spec: {
350 failureDomain: "host",
351 replicated: {
352 size: 1,
353 },
354 },
355 },
356 objectRedundant: rook.S3ObjectStore(k0.ceph.waw2, "waw-hdd-redundant-2-object") {
357 spec: {
358 metadataPool: {
359 failureDomain: "host",
360 replicated: { size: 3 },
361 },
362 dataPool: {
363 failureDomain: "host",
364 erasureCoded: {
365 dataChunks: 2,
366 codingChunks: 1,
367 },
368 },
369 },
370 },
371 },
372 waw3: rook.Cluster(k0.cluster.rook, "ceph-waw3") {
373 spec: {
374 mon: {
375 count: 3,
376 allowMultiplePerNode: false,
377 },
378 storage: {
379 useAllNodes: false,
380 useAllDevices: false,
381 config: {
382 databaseSizeMB: "1024",
383 journalSizeMB: "1024",
384 },
385 nodes: [
386 {
387 name: "dcr01s22.hswaw.net",
388 location: "rack=dcr01 host=dcr01s22",
389 devices: [
390 // https://github.com/rook/rook/issues/1228
391 //{ name: "disk/by-id/wwan-0x" + wwan }
392 //for wwan in [
393 // "5000c5008508c433",
394 // "5000c500850989cf",
395 // "5000c5008508f843",
396 // "5000c5008508baf7",
397 //]
398 { name: "sdn" },
399 { name: "sda" },
400 { name: "sdb" },
401 { name: "sdc" },
402 ],
403 },
404 {
405 name: "dcr01s24.hswaw.net",
406 location: "rack=dcr01 host=dcr01s22",
407 devices: [
408 // https://github.com/rook/rook/issues/1228
409 //{ name: "disk/by-id/wwan-0x" + wwan }
410 //for wwan in [
411 // "5000c5008508ee03",
412 // "5000c5008508c9ef",
413 // "5000c5008508df33",
414 // "5000c5008508dd3b",
415 //]
416 { name: "sdm" },
417 { name: "sda" },
418 { name: "sdb" },
419 { name: "sdc" },
420 ],
421 },
422 ],
423 },
424 benji:: {
425 metadataStorageClass: "waw-hdd-paranoid-3",
426 encryptionPassword: std.split((importstr "../secrets/plain/k0-benji-encryption-password"), '\n')[0],
427 pools: [
428 ],
429 s3Configuration: {
430 awsAccessKeyId: "RPYZIROFXNLQVU2WJ4R3",
431 awsSecretAccessKey: std.split((importstr "../secrets/plain/k0-benji-secret-access-key"), '\n')[0],
432 bucketName: "benji-k0-backups-waw3",
433 endpointUrl: "https://s3.eu-central-1.wasabisys.com/",
434 },
435 }
436 },
437 },
438 waw3Pools: {
439 // redundant block storage
440 blockRedundant: rook.ECBlockPool(k0.ceph.waw3, "waw-hdd-redundant-3") {
441 metadataReplicas: 2,
442 spec: {
443 failureDomain: "host",
444 replicated: {
445 size: 2,
446 },
447 },
448 },
449 // yolo block storage (low usage, no host redundancy)
450 blockYolo: rook.ReplicatedBlockPool(k0.ceph.waw3, "waw-hdd-yolo-3") {
451 spec: {
452 failureDomain: "osd",
453 erasureCoded: {
454 dataChunks: 12,
455 codingChunks: 4,
456 },
457 },
458 },
459 objectRedundant: rook.S3ObjectStore(k0.ceph.waw3, "waw-hdd-redundant-3-object") {
460 spec: {
461 metadataPool: {
462 failureDomain: "host",
463 replicated: { size: 2 },
464 },
465 dataPool: {
466 failureDomain: "host",
467 replicated: { size: 2 },
468 },
469 },
470 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200471 },
472 },
Sergiusz Bazanski9496d992019-09-02 16:32:40 +0200473
474 # Used for owncloud.hackerspace.pl, which for now lices on boston-packets.hackerspace.pl.
475 nextcloud: kube._Object("ceph.rook.io/v1", "CephObjectStoreUser", "nextcloud") {
476 metadata+: {
477 namespace: "ceph-waw2",
478 },
479 spec: {
480 store: "waw-hdd-redundant-2-object",
481 displayName: "nextcloud",
482 },
483 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200484 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100485}