blob: 79da4cbc82441c9483969204260c0f2683cb8062 [file] [log] [blame]
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +01001# Same as upstream kubelet.nix module from nixpkgs, but with the following
2# changes:
3# - cni tunables nuked and replaced with static host dirs, so that calico
4# running on k8s can drop CNI plugins there itself
5# - package configurable separately from rest of kubernetes
6
7{ config, lib, pkgs, ... }:
8
9with lib;
10
11let
12 top = config.services.kubernetes;
13 cfg = top.kubelet;
14
15 infraContainer = pkgs.dockerTools.buildImage {
16 name = "pause";
17 tag = "latest";
18 contents = top.package.pause;
Serge Bazanski765e3692021-02-13 17:42:48 +000019 config.Cmd = ["/bin/pause"];
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +010020 };
21
22 kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig;
23
24 manifestPath = "kubernetes/manifests";
25
26 taintOptions = with lib.types; { name, ... }: {
27 options = {
28 key = mkOption {
29 description = "Key of taint.";
30 default = name;
31 type = str;
32 };
33 value = mkOption {
34 description = "Value of taint.";
35 type = str;
36 };
37 effect = mkOption {
38 description = "Effect of taint.";
39 example = "NoSchedule";
40 type = enum ["NoSchedule" "PreferNoSchedule" "NoExecute"];
41 };
42 };
43 };
44
45 taints = concatMapStringsSep "," (v: "${v.key}=${v.value}:${v.effect}") (mapAttrsToList (n: v: v) cfg.taints);
46in
47{
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +010048 # services/cluster/kubernetes/default.nix still wants to poke flannel,
49 # but since we nuke that module we have to add a fake tunable for it.
50 options.services.kubernetes.flannel = {
51 enable = mkEnableOption "enable flannel networking";
52 };
53
54 ###### interface
55 options.services.kubernetes.kubelet = with lib.types; {
56
57 address = mkOption {
58 description = "Kubernetes kubelet info server listening address.";
59 default = "0.0.0.0";
60 type = str;
61 };
62
63 clusterDns = mkOption {
64 description = "Use alternative DNS.";
65 default = "10.1.0.1";
66 type = str;
67 };
68
69 clusterDomain = mkOption {
70 description = "Use alternative domain.";
71 default = config.services.kubernetes.addons.dns.clusterDomain;
72 type = str;
73 };
74
75 clientCaFile = mkOption {
76 description = "Kubernetes apiserver CA file for client authentication.";
77 default = top.caFile;
78 type = nullOr path;
79 };
80
81 enable = mkEnableOption "Kubernetes kubelet.";
82
83 extraOpts = mkOption {
84 description = "Kubernetes kubelet extra command line options.";
85 default = "";
86 type = str;
87 };
88
89 featureGates = mkOption {
90 description = "List set of feature gates";
91 default = top.featureGates;
92 type = listOf str;
93 };
94
95 healthz = {
96 bind = mkOption {
97 description = "Kubernetes kubelet healthz listening address.";
98 default = "127.0.0.1";
99 type = str;
100 };
101
102 port = mkOption {
103 description = "Kubernetes kubelet healthz port.";
104 default = 10248;
105 type = int;
106 };
107 };
108
109 hostname = mkOption {
110 description = "Kubernetes kubelet hostname override.";
111 default = config.networking.hostName;
112 type = str;
113 };
114
115 kubeconfig = top.lib.mkKubeConfigOptions "Kubelet";
116
117 manifests = mkOption {
118 description = "List of manifests to bootstrap with kubelet (only pods can be created as manifest entry)";
119 type = attrsOf attrs;
120 default = {};
121 };
122
123 networkPlugin = mkOption {
124 description = "Network plugin to use by Kubernetes.";
125 type = nullOr (enum ["cni" "kubenet"]);
126 default = "kubenet";
127 };
128
129 nodeIp = mkOption {
130 description = "IP address of the node. If set, kubelet will use this IP address for the node.";
131 default = null;
132 type = nullOr str;
133 };
134
135 registerNode = mkOption {
136 description = "Whether to auto register kubelet with API server.";
137 default = true;
138 type = bool;
139 };
140
141 package = mkOption {
142 description = "Kubernetes package to use.";
143 type = types.package;
144 default = pkgs.kubernetes;
145 defaultText = "pkgs.kubernetes";
146 };
147
148 port = mkOption {
149 description = "Kubernetes kubelet info server listening port.";
150 default = 10250;
151 type = int;
152 };
153
154 seedDockerImages = mkOption {
155 description = "List of docker images to preload on system";
156 default = [];
157 type = listOf package;
158 };
159
160 taints = mkOption {
161 description = "Node taints (https://kubernetes.io/docs/concepts/configuration/assign-pod-node/).";
162 default = {};
163 type = attrsOf (submodule [ taintOptions ]);
164 };
165
166 tlsCertFile = mkOption {
167 description = "File containing x509 Certificate for HTTPS.";
168 default = null;
169 type = nullOr path;
170 };
171
172 tlsKeyFile = mkOption {
173 description = "File containing x509 private key matching tlsCertFile.";
174 default = null;
175 type = nullOr path;
176 };
177
178 unschedulable = mkOption {
179 description = "Whether to set node taint to unschedulable=true as it is the case of node that has only master role.";
180 default = false;
181 type = bool;
182 };
183
184 verbosity = mkOption {
185 description = ''
186 Optional glog verbosity level for logging statements. See
187 <link xlink:href="https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md"/>
188 '';
189 default = null;
190 type = nullOr int;
191 };
192
193 };
194
195 ###### implementation
196 config = mkMerge [
197 (mkIf cfg.enable {
198 services.kubernetes.kubelet.seedDockerImages = [infraContainer];
199
Serge Bazanski765e3692021-02-13 17:42:48 +0000200 # Drop crictl into administrative command line.
201 environment.systemPackages = with pkgs; [ cri-tools ];
202
203 # Force disable Docker.
204 virtualisation.docker.enable = false;
205
206 # TODO(q3k): move to unified cgroups (cgroup v2) once we upgrade to
207 # Kubelet 1.19.
208 systemd.enableUnifiedCgroupHierarchy = false;
209
210 # Run containerd service. This is exposes the CRI API that is consumed by
211 # crictl and Kubelet.
212 systemd.services.containerd = {
213 description = "containerd container runtime";
214 wantedBy = [ "kubernetes.target" ];
215 after = [ "network.target" ];
216 path = with pkgs; [ runc iptables ];
217 serviceConfig = {
218 Delegate = "yes";
219 KillMode = "process";
220 Restart = "always";
221 RestartSec = "5";
222 LimitNPROC = "infinity";
223 LimitCORE = "infinity";
224 # https://github.com/coreos/fedora-coreos-tracker/issues/329
225 LimitNOFILE = "1048576";
226 TasksMax = "infinity";
227 OOMScoreAdjust = "-999";
228
229 ExecStart = "${pkgs.containerd}/bin/containerd -c ${./containerd.toml}";
230 };
231 };
232
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +0100233 systemd.services.kubelet = {
234 description = "Kubernetes Kubelet Service";
235 wantedBy = [ "kubernetes.target" ];
Serge Bazanski765e3692021-02-13 17:42:48 +0000236 after = [ "network.target" "containerd.service" "kube-apiserver.service" ];
237 path = with pkgs; [ gitMinimal openssh utillinux iproute ethtool thin-provisioning-tools iptables socat cri-tools containerd gzip ] ++ top.path;
238
239 # Mildly hacky - by moving over to OCI image build infrastructure in
240 # NixOS we should be able to get rid of the gunzip.
241 # TODO(q3k): figure this out, check if this is even being used by
242 # kubelet.
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +0100243 preStart = ''
244 ${concatMapStrings (img: ''
Serge Bazanski765e3692021-02-13 17:42:48 +0000245 echo "Seeding OCI image: ${img}"
246 cp ${img} /tmp/image.tar.gz
247 rm -f /tmp/image.tar
248 gunzip /tmp/image.tar.gz
249 ctr -n=k8s.io images import /tmp/image.tar || true
250 rm /tmp/image.tar
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +0100251 '') cfg.seedDockerImages}
252 '';
253 serviceConfig = {
254 Slice = "kubernetes.slice";
255 CPUAccounting = true;
256 MemoryAccounting = true;
257 Restart = "on-failure";
258 RestartSec = "1000ms";
259 ExecStart = ''${cfg.package}/bin/kubelet \
Serge Bazanski765e3692021-02-13 17:42:48 +0000260 --cgroup-driver=systemd \
261 --container-runtime=remote \
262 --container-runtime-endpoint=unix:///var/run/containerd/containerd.sock \
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +0100263 --address=${cfg.address} \
264 --authentication-token-webhook \
265 --authentication-token-webhook-cache-ttl="10s" \
266 --authorization-mode=Webhook \
267 ${optionalString (cfg.clientCaFile != null)
268 "--client-ca-file=${cfg.clientCaFile}"} \
269 ${optionalString (cfg.clusterDns != "")
270 "--cluster-dns=${cfg.clusterDns}"} \
271 ${optionalString (cfg.clusterDomain != "")
272 "--cluster-domain=${cfg.clusterDomain}"} \
273 --cni-conf-dir=/opt/cni/conf \
274 --cni-bin-dir=/opt/cni/bin \
275 ${optionalString (cfg.featureGates != [])
276 "--feature-gates=${concatMapStringsSep "," (feature: "${feature}=true") cfg.featureGates}"} \
277 --hairpin-mode=hairpin-veth \
278 --healthz-bind-address=${cfg.healthz.bind} \
279 --healthz-port=${toString cfg.healthz.port} \
280 --hostname-override=${cfg.hostname} \
281 --kubeconfig=${kubeconfig} \
282 ${optionalString (cfg.networkPlugin != null)
283 "--network-plugin=${cfg.networkPlugin}"} \
284 ${optionalString (cfg.nodeIp != null)
285 "--node-ip=${cfg.nodeIp}"} \
286 --pod-infra-container-image=pause \
287 ${optionalString (cfg.manifests != {})
288 "--pod-manifest-path=/etc/${manifestPath}"} \
289 --port=${toString cfg.port} \
290 --register-node=${boolToString cfg.registerNode} \
291 ${optionalString (taints != "")
292 "--register-with-taints=${taints}"} \
293 --root-dir=${top.dataDir} \
294 ${optionalString (cfg.tlsCertFile != null)
295 "--tls-cert-file=${cfg.tlsCertFile}"} \
296 ${optionalString (cfg.tlsKeyFile != null)
297 "--tls-private-key-file=${cfg.tlsKeyFile}"} \
298 ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \
299 ${cfg.extraOpts}
300 '';
301 WorkingDirectory = top.dataDir;
302 };
303 };
304
Serge Bazanski765e3692021-02-13 17:42:48 +0000305 boot.kernelModules = [ "br_netfilter" "overlay" ];
Patryk Jakuszewa2bcfea2022-12-10 00:00:20 +0000306 boot.kernel.sysctl = {
307 "net.ipv4.ip_forward" = "1";
308 "vm.max_map_count" = "262144"; # Needed for running things such as ElasticSearch.
309 };
Sergiusz Bazanskic78cc132020-02-02 22:31:53 +0100310
311 services.kubernetes.kubelet.hostname = with config.networking;
312 mkDefault (hostName + optionalString (domain != null) ".${domain}");
313
314 services.kubernetes.pki.certs = with top.lib; {
315 kubelet = mkCert {
316 name = "kubelet";
317 CN = top.kubelet.hostname;
318 action = "systemctl restart kubelet.service";
319
320 };
321 kubeletClient = mkCert {
322 name = "kubelet-client";
323 CN = "system:node:${top.kubelet.hostname}";
324 fields = {
325 O = "system:nodes";
326 };
327 action = "systemctl restart kubelet.service";
328 };
329 };
330
331 services.kubernetes.kubelet.kubeconfig.server = mkDefault top.apiserverAddress;
332 })
333
334 (mkIf (cfg.enable && cfg.manifests != {}) {
335 environment.etc = mapAttrs' (name: manifest:
336 nameValuePair "${manifestPath}/${name}.json" {
337 text = builtins.toJSON manifest;
338 mode = "0755";
339 }
340 ) cfg.manifests;
341 })
342
343 (mkIf (cfg.unschedulable && cfg.enable) {
344 services.kubernetes.kubelet.taints.unschedulable = {
345 value = "true";
346 effect = "NoSchedule";
347 };
348 })
349
350 ];
351}