blob: f896695760e72e09cd8709d50946e24dfcb0f058 [file] [log] [blame]
Serge Bazanskief3aab62022-11-18 14:39:45 +00001# Vendored from nixpkgs git 44ad80ab1036c5cc83ada4bfa451dac9939f2a10
2# Copyright (c) 2003-2023 Eelco Dolstra and the Nixpkgs/NixOS contributors
3# SPDX-License-Identifier: MIT
4#
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +01005# Same as upstream kubelet.nix module from nixpkgs, but with the following
6# changes:
7# - cni tunables nuked and replaced with static host dirs, so that calico
8# running on k8s can drop CNI plugins there itself
9# - package configurable separately from rest of kubernetes
10
11{ config, lib, pkgs, ... }:
12
13with lib;
14
15let
16 top = config.services.kubernetes;
17 cfg = top.kubelet;
18
19 infraContainer = pkgs.dockerTools.buildImage {
20 name = "pause";
21 tag = "latest";
22 contents = top.package.pause;
Serge Bazanski765e3692021-02-13 17:42:48 +000023 config.Cmd = ["/bin/pause"];
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +010024 };
25
26 kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig;
27
28 manifestPath = "kubernetes/manifests";
29
30 taintOptions = with lib.types; { name, ... }: {
31 options = {
32 key = mkOption {
33 description = "Key of taint.";
34 default = name;
35 type = str;
36 };
37 value = mkOption {
38 description = "Value of taint.";
39 type = str;
40 };
41 effect = mkOption {
42 description = "Effect of taint.";
43 example = "NoSchedule";
44 type = enum ["NoSchedule" "PreferNoSchedule" "NoExecute"];
45 };
46 };
47 };
48
49 taints = concatMapStringsSep "," (v: "${v.key}=${v.value}:${v.effect}") (mapAttrsToList (n: v: v) cfg.taints);
50in
51{
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +010052 # services/cluster/kubernetes/default.nix still wants to poke flannel,
53 # but since we nuke that module we have to add a fake tunable for it.
54 options.services.kubernetes.flannel = {
55 enable = mkEnableOption "enable flannel networking";
56 };
57
58 ###### interface
59 options.services.kubernetes.kubelet = with lib.types; {
60
61 address = mkOption {
62 description = "Kubernetes kubelet info server listening address.";
63 default = "0.0.0.0";
64 type = str;
65 };
66
67 clusterDns = mkOption {
68 description = "Use alternative DNS.";
69 default = "10.1.0.1";
70 type = str;
71 };
72
73 clusterDomain = mkOption {
74 description = "Use alternative domain.";
75 default = config.services.kubernetes.addons.dns.clusterDomain;
76 type = str;
77 };
78
79 clientCaFile = mkOption {
80 description = "Kubernetes apiserver CA file for client authentication.";
81 default = top.caFile;
82 type = nullOr path;
83 };
84
85 enable = mkEnableOption "Kubernetes kubelet.";
86
87 extraOpts = mkOption {
88 description = "Kubernetes kubelet extra command line options.";
89 default = "";
90 type = str;
91 };
92
93 featureGates = mkOption {
94 description = "List set of feature gates";
95 default = top.featureGates;
96 type = listOf str;
97 };
98
99 healthz = {
100 bind = mkOption {
101 description = "Kubernetes kubelet healthz listening address.";
102 default = "127.0.0.1";
103 type = str;
104 };
105
106 port = mkOption {
107 description = "Kubernetes kubelet healthz port.";
108 default = 10248;
109 type = int;
110 };
111 };
112
113 hostname = mkOption {
114 description = "Kubernetes kubelet hostname override.";
115 default = config.networking.hostName;
116 type = str;
117 };
118
119 kubeconfig = top.lib.mkKubeConfigOptions "Kubelet";
120
121 manifests = mkOption {
122 description = "List of manifests to bootstrap with kubelet (only pods can be created as manifest entry)";
123 type = attrsOf attrs;
124 default = {};
125 };
126
127 networkPlugin = mkOption {
128 description = "Network plugin to use by Kubernetes.";
129 type = nullOr (enum ["cni" "kubenet"]);
130 default = "kubenet";
131 };
132
133 nodeIp = mkOption {
134 description = "IP address of the node. If set, kubelet will use this IP address for the node.";
135 default = null;
136 type = nullOr str;
137 };
138
139 registerNode = mkOption {
140 description = "Whether to auto register kubelet with API server.";
141 default = true;
142 type = bool;
143 };
144
145 package = mkOption {
146 description = "Kubernetes package to use.";
147 type = types.package;
148 default = pkgs.kubernetes;
149 defaultText = "pkgs.kubernetes";
150 };
151
152 port = mkOption {
153 description = "Kubernetes kubelet info server listening port.";
154 default = 10250;
155 type = int;
156 };
157
158 seedDockerImages = mkOption {
159 description = "List of docker images to preload on system";
160 default = [];
161 type = listOf package;
162 };
163
164 taints = mkOption {
165 description = "Node taints (https://kubernetes.io/docs/concepts/configuration/assign-pod-node/).";
166 default = {};
167 type = attrsOf (submodule [ taintOptions ]);
168 };
169
170 tlsCertFile = mkOption {
171 description = "File containing x509 Certificate for HTTPS.";
172 default = null;
173 type = nullOr path;
174 };
175
176 tlsKeyFile = mkOption {
177 description = "File containing x509 private key matching tlsCertFile.";
178 default = null;
179 type = nullOr path;
180 };
181
182 unschedulable = mkOption {
183 description = "Whether to set node taint to unschedulable=true as it is the case of node that has only master role.";
184 default = false;
185 type = bool;
186 };
187
188 verbosity = mkOption {
189 description = ''
190 Optional glog verbosity level for logging statements. See
191 <link xlink:href="https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md"/>
192 '';
193 default = null;
194 type = nullOr int;
195 };
196
197 };
198
199 ###### implementation
200 config = mkMerge [
201 (mkIf cfg.enable {
202 services.kubernetes.kubelet.seedDockerImages = [infraContainer];
203
Serge Bazanski765e3692021-02-13 17:42:48 +0000204 # Drop crictl into administrative command line.
205 environment.systemPackages = with pkgs; [ cri-tools ];
206
207 # Force disable Docker.
208 virtualisation.docker.enable = false;
209
210 # TODO(q3k): move to unified cgroups (cgroup v2) once we upgrade to
211 # Kubelet 1.19.
212 systemd.enableUnifiedCgroupHierarchy = false;
213
214 # Run containerd service. This is exposes the CRI API that is consumed by
215 # crictl and Kubelet.
216 systemd.services.containerd = {
217 description = "containerd container runtime";
218 wantedBy = [ "kubernetes.target" ];
219 after = [ "network.target" ];
220 path = with pkgs; [ runc iptables ];
221 serviceConfig = {
222 Delegate = "yes";
223 KillMode = "process";
224 Restart = "always";
225 RestartSec = "5";
226 LimitNPROC = "infinity";
227 LimitCORE = "infinity";
228 # https://github.com/coreos/fedora-coreos-tracker/issues/329
229 LimitNOFILE = "1048576";
230 TasksMax = "infinity";
231 OOMScoreAdjust = "-999";
232
Serge Bazanskief3aab62022-11-18 14:39:45 +0000233 ExecStart = "${pkgs.containerd}/bin/containerd -c ${../containerd.toml}";
Serge Bazanski765e3692021-02-13 17:42:48 +0000234 };
235 };
236
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +0100237 systemd.services.kubelet = {
238 description = "Kubernetes Kubelet Service";
239 wantedBy = [ "kubernetes.target" ];
Serge Bazanski765e3692021-02-13 17:42:48 +0000240 after = [ "network.target" "containerd.service" "kube-apiserver.service" ];
241 path = with pkgs; [ gitMinimal openssh utillinux iproute ethtool thin-provisioning-tools iptables socat cri-tools containerd gzip ] ++ top.path;
242
243 # Mildly hacky - by moving over to OCI image build infrastructure in
244 # NixOS we should be able to get rid of the gunzip.
245 # TODO(q3k): figure this out, check if this is even being used by
246 # kubelet.
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +0100247 preStart = ''
248 ${concatMapStrings (img: ''
Serge Bazanski765e3692021-02-13 17:42:48 +0000249 echo "Seeding OCI image: ${img}"
250 cp ${img} /tmp/image.tar.gz
251 rm -f /tmp/image.tar
252 gunzip /tmp/image.tar.gz
253 ctr -n=k8s.io images import /tmp/image.tar || true
254 rm /tmp/image.tar
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +0100255 '') cfg.seedDockerImages}
256 '';
257 serviceConfig = {
258 Slice = "kubernetes.slice";
259 CPUAccounting = true;
260 MemoryAccounting = true;
261 Restart = "on-failure";
262 RestartSec = "1000ms";
263 ExecStart = ''${cfg.package}/bin/kubelet \
Serge Bazanski765e3692021-02-13 17:42:48 +0000264 --cgroup-driver=systemd \
265 --container-runtime=remote \
266 --container-runtime-endpoint=unix:///var/run/containerd/containerd.sock \
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +0100267 --address=${cfg.address} \
268 --authentication-token-webhook \
269 --authentication-token-webhook-cache-ttl="10s" \
270 --authorization-mode=Webhook \
271 ${optionalString (cfg.clientCaFile != null)
272 "--client-ca-file=${cfg.clientCaFile}"} \
273 ${optionalString (cfg.clusterDns != "")
274 "--cluster-dns=${cfg.clusterDns}"} \
275 ${optionalString (cfg.clusterDomain != "")
276 "--cluster-domain=${cfg.clusterDomain}"} \
277 --cni-conf-dir=/opt/cni/conf \
278 --cni-bin-dir=/opt/cni/bin \
279 ${optionalString (cfg.featureGates != [])
280 "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \
281 --hairpin-mode=hairpin-veth \
282 --healthz-bind-address=${cfg.healthz.bind} \
283 --healthz-port=${toString cfg.healthz.port} \
284 --hostname-override=${cfg.hostname} \
285 --kubeconfig=${kubeconfig} \
286 ${optionalString (cfg.networkPlugin != null)
287 "--network-plugin=${cfg.networkPlugin}"} \
288 ${optionalString (cfg.nodeIp != null)
289 "--node-ip=${cfg.nodeIp}"} \
290 --pod-infra-container-image=pause \
291 ${optionalString (cfg.manifests != {})
292 "--pod-manifest-path=/etc/${manifestPath}"} \
293 --port=${toString cfg.port} \
294 --register-node=${boolToString cfg.registerNode} \
295 ${optionalString (taints != "")
296 "--register-with-taints=${taints}"} \
297 --root-dir=${top.dataDir} \
298 ${optionalString (cfg.tlsCertFile != null)
299 "--tls-cert-file=${cfg.tlsCertFile}"} \
300 ${optionalString (cfg.tlsKeyFile != null)
301 "--tls-private-key-file=${cfg.tlsKeyFile}"} \
302 ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \
303 ${cfg.extraOpts}
304 '';
305 WorkingDirectory = top.dataDir;
306 };
307 };
308
Serge Bazanski765e3692021-02-13 17:42:48 +0000309 boot.kernelModules = [ "br_netfilter" "overlay" ];
Patryk Jakuszewa2bcfea2022-12-10 00:00:20 +0000310 boot.kernel.sysctl = {
311 "net.ipv4.ip_forward" = "1";
312 "vm.max_map_count" = "262144"; # Needed for running things such as ElasticSearch.
313 };
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +0100314
315 services.kubernetes.kubelet.hostname = with config.networking;
316 mkDefault (hostName + optionalString (domain != null) ".${domain}");
317
318 services.kubernetes.pki.certs = with top.lib; {
319 kubelet = mkCert {
320 name = "kubelet";
321 CN = top.kubelet.hostname;
322 action = "systemctl restart kubelet.service";
323
324 };
325 kubeletClient = mkCert {
326 name = "kubelet-client";
327 CN = "system:node:${top.kubelet.hostname}";
328 fields = {
329 O = "system:nodes";
330 };
331 action = "systemctl restart kubelet.service";
332 };
333 };
334
335 services.kubernetes.kubelet.kubeconfig.server = mkDefault top.apiserverAddress;
336 })
337
338 (mkIf (cfg.enable && cfg.manifests != {}) {
339 environment.etc = mapAttrs' (name: manifest:
340 nameValuePair "${manifestPath}/${name}.json" {
341 text = builtins.toJSON manifest;
342 mode = "0755";
343 }
344 ) cfg.manifests;
345 })
346
347 (mkIf (cfg.unschedulable && cfg.enable) {
348 services.kubernetes.kubelet.taints.unschedulable = {
349 value = "true";
350 effect = "NoSchedule";
351 };
352 })
353
354 ];
355}