blob: e89a8010327d486a13d7d10456ab6626145a20bc [file] [log] [blame]
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +01001# Top level cluster configuration.
2
3local kube = import "../../kube/kube.libsonnet";
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +02004local policies = import "../../kube/policies.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +02005
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +01006local calico = import "lib/calico.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +02007local certmanager = import "lib/cert-manager.libsonnet";
8local cockroachdb = import "lib/cockroachdb.libsonnet";
9local coredns = import "lib/coredns.libsonnet";
Sergiusz Bazanski1e565dc2019-01-18 09:40:59 +010010local metallb = import "lib/metallb.libsonnet";
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +020011local metrics = import "lib/metrics.libsonnet";
Sergiusz Bazanskia9c7e862019-04-01 17:56:28 +020012local nginx = import "lib/nginx.libsonnet";
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020013local prodvider = import "lib/prodvider.libsonnet";
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020014local registry = import "lib/registry.libsonnet";
Sergiusz Bazanskib7fcc672019-04-01 18:40:50 +020015local rook = import "lib/rook.libsonnet";
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020016local pki = import "lib/pki.libsonnet";
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010017
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020018local Cluster(short, realm) = {
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010019 local cluster = self,
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020020 local cfg = cluster.cfg,
21
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020022 short:: short,
23 realm:: realm,
24 fqdn:: "%s.%s" % [cluster.short, cluster.realm],
25
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +020026 cfg:: {
27 // Storage class used for internal services (like registry). This must
28 // be set to a valid storage class. This can either be a cloud provider class
29 // (when running on GKE &co) or a storage class created using rook.
30 storageClassNameRedundant: error "storageClassNameRedundant must be set",
31 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010032
33 // These are required to let the API Server contact kubelets.
34 crAPIServerToKubelet: kube.ClusterRole("system:kube-apiserver-to-kubelet") {
35 metadata+: {
36 annotations+: {
37 "rbac.authorization.kubernetes.io/autoupdate": "true",
38 },
39 labels+: {
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020040 "kubernetes.io/bootstrapping": "rbac-defaults",
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010041 },
42 },
43 rules: [
44 {
45 apiGroups: [""],
46 resources: ["nodes/%s" % r for r in [ "proxy", "stats", "log", "spec", "metrics" ]],
47 verbs: ["*"],
48 },
49 ],
50 },
Sergiusz Bazanski5bebbeb2019-01-13 22:08:05 +010051 crbAPIServer: kube.ClusterRoleBinding("system:kube-apiserver") {
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010052 roleRef: {
53 apiGroup: "rbac.authorization.k8s.io",
54 kind: "ClusterRole",
55 name: cluster.crAPIServerToKubelet.metadata.name,
56 },
57 subjects: [
58 {
59 apiGroup: "rbac.authorization.k8s.io",
60 kind: "User",
61 # A cluster API Server authenticates with a certificate whose CN is == to the FQDN of the cluster.
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +020062 name: cluster.fqdn,
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +010063 },
64 ],
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +010065 },
66
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +020067 // This ClusteRole is bound to all humans that log in via prodaccess/prodvider/SSO.
68 // It should allow viewing of non-sensitive data for debugability and openness.
69 crViewer: kube.ClusterRole("system:viewer") {
70 rules: [
71 {
72 apiGroups: [""],
73 resources: [
74 "nodes",
75 "namespaces",
76 "pods",
77 "configmaps",
78 "services",
79 ],
80 verbs: ["list"],
81 },
82 {
83 apiGroups: ["metrics.k8s.io"],
84 resources: [
85 "nodes",
86 "pods",
87 ],
88 verbs: ["list"],
89 },
90 {
91 apiGroups: ["apps"],
92 resources: [
93 "statefulsets",
94 ],
95 verbs: ["list"],
96 },
97 {
98 apiGroups: ["extensions"],
99 resources: [
100 "deployments",
101 "ingresses",
102 ],
103 verbs: ["list"],
104 }
105 ],
106 },
107 // This ClusterRole is applied (scoped to personal namespace) to all humans.
108 crFullInNamespace: kube.ClusterRole("system:admin-namespace") {
109 rules: [
110 {
111 apiGroups: ["*"],
112 resources: ["*"],
113 verbs: ["*"],
114 },
115 ],
116 },
117 // This ClusterRoleBindings allows root access to cluster admins.
118 crbAdmins: kube.ClusterRoleBinding("system:admins") {
119 roleRef: {
120 apiGroup: "rbac.authorization.k8s.io",
121 kind: "ClusterRole",
122 name: "cluster-admin",
123 },
124 subjects: [
125 {
126 apiGroup: "rbac.authorization.k8s.io",
127 kind: "User",
128 name: user + "@hackerspace.pl",
129 } for user in [
130 "q3k",
131 "implr",
132 "informatic",
133 ]
134 ],
135 },
136
137 podSecurityPolicies: policies.Cluster {},
138
139 allowInsecureNamespaces: [
140 policies.AllowNamespaceInsecure("kube-system"),
141 # TODO(q3k): fix this?
142 policies.AllowNamespaceInsecure("ceph-waw2"),
Sergiusz Bazanski5f3a5e02019-09-25 02:51:51 +0200143 policies.AllowNamespaceInsecure("matrix"),
144 policies.AllowNamespaceInsecure("registry"),
145 policies.AllowNamespaceInsecure("internet"),
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200146 ],
147
148 // Allow all service accounts (thus all controllers) to create secure pods.
149 crbAllowServiceAccountsSecure: kube.ClusterRoleBinding("policy:allow-all-secure") {
150 roleRef_: cluster.podSecurityPolicies.secureRole,
151 subjects: [
152 {
153 kind: "Group",
154 apiGroup: "rbac.authorization.k8s.io",
155 name: "system:serviceaccounts",
156 }
157 ],
158 },
159
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +0100160 // Calico network fabric
161 calico: calico.Environment {},
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +0100162 // CoreDNS for this cluster.
Sergiusz Bazanski54490d32019-10-02 20:47:18 +0200163 dns: coredns.Environment {
164 cfg+: {
165 cluster_domains: [
166 "cluster.local",
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200167 cluster.fqdn,
Sergiusz Bazanski54490d32019-10-02 20:47:18 +0200168 ],
169 },
170 },
Sergiusz Bazanskiaf3be422019-01-17 18:57:19 +0100171 // Metrics Server
172 metrics: metrics.Environment {},
Sergiusz Bazanski1e565dc2019-01-18 09:40:59 +0100173 // Metal Load Balancer
Sergiusz Bazanski14cbacb2019-04-01 18:00:44 +0200174 metallb: metallb.Environment {
175 cfg+: {
176 addressPools: [
177 { name: "public-v4-1", protocol: "layer2", addresses: ["185.236.240.50-185.236.240.63"] },
178 ],
179 },
180 },
Sergiusz Bazanskia9c7e862019-04-01 17:56:28 +0200181 // Main nginx Ingress Controller
182 nginx: nginx.Environment {},
Piotr Dobrowolski79ddbc52019-04-02 13:20:15 +0200183 certmanager: certmanager.Environment {},
Sergiusz Bazanskie31d64f2019-10-02 20:59:26 +0200184 issuer: kube.ClusterIssuer("letsencrypt-prod") {
Piotr Dobrowolski3187c592019-04-02 14:44:04 +0200185 spec: {
186 acme: {
187 server: "https://acme-v02.api.letsencrypt.org/directory",
188 email: "bofh@hackerspace.pl",
189 privateKeySecretRef: {
190 name: "letsencrypt-prod"
191 },
192 http01: {},
193 },
194 },
195 },
Sergiusz Bazanskic6da1272019-04-02 00:06:13 +0200196
Sergiusz Bazanskib7fcc672019-04-01 18:40:50 +0200197 // Rook Ceph storage
Sergiusz Bazanskic3b0f762019-06-20 16:42:19 +0200198 rook: rook.Operator {
199 operator+: {
200 spec+: {
201 // TODO(q3k): Bring up the operator again when stability gets fixed
202 // See: https://github.com/rook/rook/issues/3059#issuecomment-492378873
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200203 replicas: 1,
Sergiusz Bazanskic3b0f762019-06-20 16:42:19 +0200204 },
205 },
206 },
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200207
208 // Docker registry
209 registry: registry.Environment {
210 cfg+: {
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200211 domain: "registry.%s" % [cluster.fqdn],
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200212 storageClassName: cfg.storageClassNameParanoid,
213 objectStorageName: "waw-hdd-redundant-2-object",
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200214 },
215 },
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200216
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200217 // TLS PKI machinery
218 pki: pki.Environment(cluster.short, cluster.realm),
219
Sergiusz Bazanskib13b7ff2019-08-29 20:12:24 +0200220 // Prodvider
Sergiusz Bazanskid186e942019-10-04 13:46:39 +0200221 prodvider: prodvider.Environment {
222 cfg+: {
223 apiEndpoint: "kubernetes.default.svc.%s" % [cluster.fqdn],
224 },
225 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100226};
227
Sergiusz Bazanski49b9a132019-01-14 00:02:59 +0100228
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100229{
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200230 k0: {
231 local k0 = self,
Sergiusz Bazanski6f773e02019-10-02 20:46:48 +0200232 cluster: Cluster("k0", "hswaw.net") {
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200233 cfg+: {
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200234 storageClassNameParanoid: k0.ceph.blockParanoid.name,
Sergiusz Bazanski4d61d202019-07-21 16:56:41 +0200235 },
236 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200237 cockroach: {
Sergiusz Bazanskid5338922019-08-09 14:13:50 +0200238 waw2: cockroachdb.Cluster("crdb-waw1") {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200239 cfg+: {
240 topology: [
Sergiusz Bazanski184678b2019-06-22 02:07:41 +0200241 { name: "bc01n01", node: "bc01n01.hswaw.net" },
242 { name: "bc01n02", node: "bc01n02.hswaw.net" },
243 { name: "bc01n03", node: "bc01n03.hswaw.net" },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200244 ],
Sergiusz Bazanskid5338922019-08-09 14:13:50 +0200245 hostPath: "/var/db/crdb-waw1",
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200246 },
247 },
Sergiusz Bazanski1fad2e52019-08-01 20:16:27 +0200248 clients: {
249 cccampix: k0.cockroach.waw2.Client("cccampix"),
250 cccampixDev: k0.cockroach.waw2.Client("cccampix-dev"),
251 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200252 },
253 ceph: {
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200254 // waw1 cluster - dead as of 2019/08/06, data corruption
255 // waw2 cluster
256 waw2: rook.Cluster(k0.cluster.rook, "ceph-waw2") {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200257 spec: {
258 mon: {
259 count: 3,
260 allowMultiplePerNode: false,
261 },
262 storage: {
263 useAllNodes: false,
264 useAllDevices: false,
265 config: {
266 databaseSizeMB: "1024",
267 journalSizeMB: "1024",
268 },
269 nodes: [
270 {
271 name: "bc01n01.hswaw.net",
272 location: "rack=dcr01 chassis=bc01 host=bc01n01",
273 devices: [ { name: "sda" } ],
274 },
275 {
276 name: "bc01n02.hswaw.net",
277 location: "rack=dcr01 chassis=bc01 host=bc01n02",
278 devices: [ { name: "sda" } ],
279 },
280 {
281 name: "bc01n03.hswaw.net",
282 location: "rack=dcr01 chassis=bc01 host=bc01n03",
283 devices: [ { name: "sda" } ],
284 },
285 ],
286 },
Sergiusz Bazanski13bb1bf2019-08-31 16:33:29 +0200287 benji:: {
288 metadataStorageClass: "waw-hdd-paranoid-2",
289 encryptionPassword: std.split((importstr "../secrets/plain/k0-benji-encryption-password"), '\n')[0],
290 pools: [
291 "waw-hdd-redundant-2",
292 "waw-hdd-redundant-2-metadata",
293 "waw-hdd-paranoid-2",
294 "waw-hdd-yolo-2",
295 ],
296 s3Configuration: {
297 awsAccessKeyId: "RPYZIROFXNLQVU2WJ4R3",
298 awsSecretAccessKey: std.split((importstr "../secrets/plain/k0-benji-secret-access-key"), '\n')[0],
299 bucketName: "benji-k0-backups",
300 endpointUrl: "https://s3.eu-central-1.wasabisys.com/",
301 },
302 }
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200303 },
304 },
305 // redundant block storage
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200306 blockRedundant: rook.ECBlockPool(k0.ceph.waw2, "waw-hdd-redundant-2") {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200307 spec: {
308 failureDomain: "host",
309 erasureCoded: {
310 dataChunks: 2,
311 codingChunks: 1,
312 },
313 },
314 },
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200315 // paranoid block storage (3 replicas)
316 blockParanoid: rook.ReplicatedBlockPool(k0.ceph.waw2, "waw-hdd-paranoid-2") {
317 spec: {
318 failureDomain: "host",
319 replicated: {
320 size: 3,
321 },
322 },
323 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200324 // yolo block storage (no replicas!)
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200325 blockYolo: rook.ReplicatedBlockPool(k0.ceph.waw2, "waw-hdd-yolo-2") {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200326 spec: {
327 failureDomain: "host",
328 replicated: {
329 size: 1,
330 },
331 },
332 },
Sergiusz Bazanskid07861b2019-08-08 17:48:25 +0200333 objectRedundant: rook.S3ObjectStore(k0.ceph.waw2, "waw-hdd-redundant-2-object") {
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200334 spec: {
335 metadataPool: {
336 failureDomain: "host",
337 replicated: { size: 3 },
338 },
339 dataPool: {
340 failureDomain: "host",
341 erasureCoded: {
342 dataChunks: 2,
343 codingChunks: 1,
344 },
345 },
346 },
347 },
348 },
Sergiusz Bazanski9496d992019-09-02 16:32:40 +0200349
350 # Used for owncloud.hackerspace.pl, which for now lices on boston-packets.hackerspace.pl.
351 nextcloud: kube._Object("ceph.rook.io/v1", "CephObjectStoreUser", "nextcloud") {
352 metadata+: {
353 namespace: "ceph-waw2",
354 },
355 spec: {
356 store: "waw-hdd-redundant-2-object",
357 displayName: "nextcloud",
358 },
359 },
Sergiusz Bazanskic7258f42019-06-21 00:24:09 +0200360 },
Sergiusz Bazanski4d9e72c2019-01-13 22:06:33 +0100361}