blob: 9b64f05c6f960bea61318fa9f3eb244d7bed2e66 [file] [log] [blame]
local kube = import "../../../kube/kube.libsonnet";
{
// Cluster sets up all cluster-specific monitoring resources in their own namespace.
// Currently this consists of a prometheus server that scrapes k8s nodes for kubelet
// and cAdvisor metrics.
Cluster(name):: {
local cluster = self,
local cfg = cluster.cfg,
cfg:: {
name: name,
namespace: "monitoring-cluster",
images: {
prometheus: "prom/prometheus:v2.18.1",
},
storageClasses: {
prometheus: error "storageClasses.prometheus must be set",
},
},
namespace: kube.Namespace(cfg.namespace),
prometheus: {
local prometheus = self,
// Configuration that's going to be emitted as prometheus.yml and passed to the
// prometheus server for this cluster.
configuration:: {
global: {
external_labels: {
cluster: cluster.cfg.name,
},
},
// Constructor for a Kubernetes scrape job that uses the pod's service account and
// TLS configuration, selecting the given k8s scrape 'role'.
local kubeScrapeConfig = function(name, role) {
job_name: name,
scheme: "https",
scrape_interval: "30s",
kubernetes_sd_configs: [ { role: role }, ],
tls_config: {
ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
},
bearer_token_file: "/var/run/secrets/kubernetes.io/serviceaccount/token",
},
scrape_configs: [
// When scraping node-based metrics (ie. node and cadvisor metrics) we contact
// the metrics endpoints on the kubelet via the API server. This is done by
// relabeling _address__ and __metrics_path__ to point at the k8s API server,
// and at the API server proxy path to reach a node's metrics endpoint.
//
// This approach was lifted from the prometheus examples for Kubernetes, and
// while the benefits outlined there do not matter that much to us (our
// kubelets listen on public addresses, anyway), we still enjoy this approach
// for the fact that we don't have to hardcode the kubelet TLS port.
//
// https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml
//
// When contacting the API server, we hardcode the 'hswaw.net' DNS suffix as
// our API server's TLS certificate only has a CN/SAN for its full FQDN, not
// the .svc.cluster.local shorthand (see //cluster/clustercfg:clustercfg.py).
// Scrape Kubernetes node metrics via apiserver. This emites kube_node_* metrics.
kubeScrapeConfig("cluster_node_metrics", "node") {
relabel_configs: [
{
action: "labelmap",
regex: "__meta_kubernetes_node_label_(.+)",
},
{
action: "replace",
target_label: "__address__",
replacement: "kubernetes.default.svc.%s.hswaw.net:443" % [cluster.cfg.name],
},
{
target_label: "__metrics_path__",
source_labels: ["__meta_kubernetes_node_name"],
regex: "(.+)",
replacement: "/api/v1/nodes/${1}/proxy/metrics",
},
],
},
// Scrape Kubernetes node cadvisor metrics via apiserver. This emits container_* metrics.
kubeScrapeConfig("cluster_cadvisor_metrics", "node") {
relabel_configs: [
{
action: "labelmap",
regex: "__meta_kubernetes_node_label_(.+)",
},
{
action: "replace",
target_label: "__address__",
replacement: "kubernetes.default.svc.%s.hswaw.net:443" % [cluster.cfg.name],
},
{
target_label: "__metrics_path__",
source_labels: ["__meta_kubernetes_node_name"],
regex: "(.+)",
replacement: "/api/v1/nodes/${1}/proxy/metrics/cadvisor",
},
],
},
],
},
configmap: kube.ConfigMap("prometheus-cluster") {
metadata+: {
namespace: cfg.namespace,
},
data: {
"prometheus.yml": std.manifestYamlDoc(prometheus.configuration),
},
},
sa: kube.ServiceAccount("prometheus-cluster") {
metadata+: {
namespace: cfg.namespace,
},
},
cr: kube.ClusterRole("monitoring-cluster-prometheus-server-%s" % [cfg.name]) {
rules: [
// Allow access to all metrics.
{ nonResourceURLs: ["/metrics"], verbs: ["get"], },
// Allow to access node details for discovery.
{ apiGroups: [""], resources: ["nodes"], verbs: ["list", "watch", "get"], },
// Allow to proxy to bare node HTTP to access per-node metrics endpoints.
{ apiGroups: [""], resources: ["nodes/proxy"], verbs: ["get"], },
],
},
crb: kube.ClusterRoleBinding("monitoring-cluster-prometheus-server-%s" % [cfg.name]) {
subjects_: [prometheus.sa],
roleRef_: prometheus.cr,
},
deploy: kube.Deployment("prometheus-cluster") {
metadata+: {
namespace: cfg.namespace,
},
spec+: {
template+: {
spec+: {
containers_: {
default: kube.Container("default") {
image: cfg.images.prometheus,
command: [
"/bin/prometheus",
"--config.file=/etc/prometheus/prometheus.yml",
"--storage.tsdb.path=/prometheus",
# TODO(q3k): reduce this once we have a long-term storage
# solution.
"--storage.tsdb.retention.time=120d",
"--web.console.libraries=/usr/share/prometheus/console_libraries",
"--web.console.templates=/usr/share/prometheus/consoles",
"--web.enable-lifecycle",
],
resources: {
requests: {
memory: "256Mi",
cpu: "100m",
},
limits: {
memory: "1Gi",
cpu: "1",
},
},
volumeMounts_: {
data: { mountPath: "/prometheus", },
configmap: { mountPath: "/etc/prometheus", },
},
},
},
serviceAccountName: prometheus.sa.metadata.name,
tolerations: [
{ key: "CriticalAddonsOnly", operator: "Exists" },
],
volumes_: {
data: kube.PersistentVolumeClaimVolume(prometheus.pvc),
configmap: kube.ConfigMapVolume(prometheus.configmap),
},
},
},
},
},
// Kubernetes metric storage volume.
pvc: kube.PersistentVolumeClaim("prometheus-cluster") {
metadata+: {
namespace: cfg.namespace,
},
spec+: {
storageClassName: cfg.storageClasses.prometheus,
accessModes: ["ReadWriteOnce"],
resources: {
requests: {
storage: "32Gi",
},
},
},
},
// Network Policy governing access to the prometheus server.
np: kube.NetworkPolicy("prometheus-cluster") {
metadata+: {
namespace: cfg.namespace,
},
spec+: kube.podLabelsSelector(prometheus.deploy) {
ingress_: {
// Deny all inbound traffic to pod.
// This will be augmented to allow access from some other pod/namespace
// in the future.
},
egress_: {
// Allow all outbound traffic from pod.
outboundAll: {},
},
policyTypes: ["Ingress", "Egress"],
},
},
},
},
}