| local kube = import "kube.libsonnet"; |
| |
| { |
| local policies = self, |
| |
| policyNameAllowInsecure: "policy:allow-insecure", |
| policyNameAllowSecure: "policy:allow-secure", |
| policyNameAllowMostlySecure: "policy:allow-mostlysecure", |
| |
| # egrep 'define CAP_[A-Z_]+.+[0-9]+$' include/linux/capability.h | cut -d' ' -f 2 | tr '\n' ',' |
| local allCapsStr = 'CAP_CHOWN,CAP_DAC_OVERRIDE,CAP_DAC_READ_SEARCH,CAP_FOWNER,CAP_FSETID,CAP_KILL,CAP_SETGID,CAP_SETUID,CAP_SETPCAP,CAP_LINUX_IMMUTABLE,CAP_NET_BIND_SERVICE,CAP_NET_BROADCAST,CAP_NET_ADMIN,CAP_NET_RAW,CAP_IPC_LOCK,CAP_IPC_OWNER,CAP_SYS_MODULE,CAP_SYS_RAWIO,CAP_SYS_CHROOT,CAP_SYS_PTRACE,CAP_SYS_PACCT,CAP_SYS_ADMIN,CAP_SYS_BOOT,CAP_SYS_NICE,CAP_SYS_RESOURCE,CAP_SYS_TIME,CAP_SYS_TTY_CONFIG,CAP_MKNOD,CAP_LEASE,CAP_AUDIT_WRITE,CAP_AUDIT_CONTROL,CAP_SETFCAP,CAP_MAC_OVERRIDE,CAP_MAC_ADMIN,CAP_SYSLOG,CAP_WAKE_ALARM,CAP_BLOCK_SUSPEND,CAP_AUDIT_READ', |
| // Split by `,`, remove CAP_ prefix, turn into unique set. |
| local allCaps = std.set(std.map(function(el) std.substr(el, 4, std.length(el)-4), std.split(allCapsStr, ','))), |
| |
| |
| Cluster: { |
| local cluster = self, |
| |
| // Insecure: allowing creation of these pods allows you to pwn the entire cluster. |
| insecure: kube._Object("policy/v1beta1", "PodSecurityPolicy", "insecure") { |
| spec: { |
| privileged: true, |
| allowPrivilegeEscalation: true, |
| allowedCapabilities: ['*'], |
| volumes: ['*'], |
| hostNetwork: true, |
| hostPorts: [ |
| { max: 40000, min: 1 }, |
| ], |
| hostIPC: true, |
| hostPID: true, |
| runAsUser: { |
| rule: 'RunAsAny', |
| }, |
| seLinux: { |
| rule: 'RunAsAny', |
| }, |
| supplementalGroups: { |
| rule: 'RunAsAny', |
| }, |
| fsGroup: { |
| rule: 'RunAsAny', |
| }, |
| }, |
| }, |
| insecureRole: kube.ClusterRole(policies.policyNameAllowInsecure) { |
| rules: [ |
| { |
| apiGroups: ['policy'], |
| resources: ['podsecuritypolicies'], |
| verbs: ['use'], |
| resourceNames: ['insecure'], |
| } |
| ], |
| }, |
| |
| // Secure: very limited subset of security policy, everyone is allowed |
| // to spawn containers of this kind. |
| secure: kube._Object("policy/v1beta1", "PodSecurityPolicy", "secure") { |
| spec: { |
| privileged: false, |
| # Required to prevent escalations to root. |
| allowPrivilegeEscalation: false, |
| # This is redundant with non-root + disallow privilege escalation, |
| # but we can provide it for defense in depth. |
| requiredDropCapabilities: ["ALL"], |
| # Allow core volume types. |
| volumes: [ |
| 'configMap', |
| 'emptyDir', |
| 'projected', |
| 'secret', |
| 'downwardAPI', |
| 'persistentVolumeClaim', |
| ], |
| hostNetwork: false, |
| hostIPC: false, |
| hostPID: false, |
| runAsUser: { |
| # Allow to run as root - docker, we trust you here. |
| rule: 'RunAsAny', |
| }, |
| seLinux: { |
| rule: 'RunAsAny', |
| }, |
| supplementalGroups: { |
| rule: 'MustRunAs', |
| ranges: [ |
| { |
| # Forbid adding the root group. |
| min: 1, |
| max: 65535, |
| } |
| ], |
| }, |
| fsGroup: { |
| rule: 'MustRunAs', |
| ranges: [ |
| { |
| # Forbid adding the root group. |
| min: 1, |
| max: 65535, |
| } |
| ], |
| }, |
| readOnlyRootFilesystem: false, |
| |
| }, |
| }, |
| secureRole: kube.ClusterRole(policies.policyNameAllowSecure) { |
| rules: [ |
| { |
| apiGroups: ['policy'], |
| resources: ['podsecuritypolicies'], |
| verbs: ['use'], |
| resourceNames: ['secure'], |
| }, |
| ], |
| }, |
| |
| // MostlySecure: like secure, but allows for setuid inside containers |
| // and enough filesystem access to run apt. |
| mostlySecure: cluster.secure { |
| metadata+: { |
| name: "mostlysecure", |
| }, |
| spec+: { |
| requiredDropCapabilities: std.setDiff(allCaps, [ |
| // Drop everything apart from: |
| "CHOWN", |
| "DAC_OVERRIDE", |
| "FOWNER", |
| "LEASE", |
| "SETGID", |
| "SETUID", |
| ]), |
| supplementalGroups: { |
| // Allow running as root gid - we allow running as root |
| // uid anyway, as we trust our container runtime. |
| rule: 'MustRunAs', |
| ranges: [ |
| { min: 0, max: 65535, }, |
| ], |
| }, |
| fsGroup: { |
| // Allow setting the fsGroup to 0, as all filesystem mounts |
| // are trusted anyway. |
| rule: 'MustRunAs', |
| ranges: [ |
| { min: 0, max: 65535, }, |
| ], |
| }, |
| }, |
| }, |
| mostlySecureRole: kube.ClusterRole(policies.policyNameAllowMostlySecure) { |
| rules: [ |
| { |
| apiGroups: ['policy'], |
| resources: ['podsecuritypolicies'], |
| verbs: ['use'], |
| resourceNames: ['mostlysecure'], |
| }, |
| ], |
| }, |
| }, |
| |
| # Allow insecure access to all service accounts in a given namespace. |
| AllowNamespaceInsecure(namespace): { |
| rb: kube.RoleBinding("policy:allow-insecure-in-" + namespace) { |
| metadata+: { |
| namespace: namespace, |
| }, |
| roleRef_: policies.Cluster.insecureRole, |
| subjects: [ |
| { |
| kind: "Group", |
| apiGroup: "rbac.authorization.k8s.io", |
| name: "system:serviceaccounts", |
| } |
| ], |
| }, |
| }, |
| |
| # Allow mostlysecure access to all service accounts in a given namespace. |
| AllowNamespaceMostlySecure(namespace): { |
| rb: kube.RoleBinding("policy:allow-mostlysecure-in-" + namespace) { |
| metadata+: { |
| namespace: namespace, |
| }, |
| roleRef_: policies.Cluster.mostlySecureRole, |
| subjects: [ |
| { |
| kind: "Group", |
| apiGroup: "rbac.authorization.k8s.io", |
| name: "system:serviceaccounts", |
| } |
| ], |
| }, |
| }, |
| } |