ops/monitoring: split up jsonnet, add simple docs
Change-Id: I8120958a6862411de0446896875766834457aba9
diff --git a/ops/monitoring/lib/cluster.libsonnet b/ops/monitoring/lib/cluster.libsonnet
new file mode 100644
index 0000000..9b64f05
--- /dev/null
+++ b/ops/monitoring/lib/cluster.libsonnet
@@ -0,0 +1,227 @@
+local kube = import "../../../kube/kube.libsonnet";
+
+{
+ // Cluster sets up all cluster-specific monitoring resources in their own namespace.
+ // Currently this consists of a prometheus server that scrapes k8s nodes for kubelet
+ // and cAdvisor metrics.
+ Cluster(name):: {
+ local cluster = self,
+ local cfg = cluster.cfg,
+ cfg:: {
+ name: name,
+ namespace: "monitoring-cluster",
+
+ images: {
+ prometheus: "prom/prometheus:v2.18.1",
+ },
+
+ storageClasses: {
+ prometheus: error "storageClasses.prometheus must be set",
+ },
+ },
+
+ namespace: kube.Namespace(cfg.namespace),
+
+ prometheus: {
+ local prometheus = self,
+
+ // Configuration that's going to be emitted as prometheus.yml and passed to the
+ // prometheus server for this cluster.
+ configuration:: {
+ global: {
+ external_labels: {
+ cluster: cluster.cfg.name,
+ },
+ },
+
+ // Constructor for a Kubernetes scrape job that uses the pod's service account and
+ // TLS configuration, selecting the given k8s scrape 'role'.
+ local kubeScrapeConfig = function(name, role) {
+ job_name: name,
+ scheme: "https",
+ scrape_interval: "30s",
+ kubernetes_sd_configs: [ { role: role }, ],
+ tls_config: {
+ ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
+ },
+ bearer_token_file: "/var/run/secrets/kubernetes.io/serviceaccount/token",
+ },
+
+ scrape_configs: [
+ // When scraping node-based metrics (ie. node and cadvisor metrics) we contact
+ // the metrics endpoints on the kubelet via the API server. This is done by
+ // relabeling _address__ and __metrics_path__ to point at the k8s API server,
+ // and at the API server proxy path to reach a node's metrics endpoint.
+ //
+ // This approach was lifted from the prometheus examples for Kubernetes, and
+ // while the benefits outlined there do not matter that much to us (our
+ // kubelets listen on public addresses, anyway), we still enjoy this approach
+ // for the fact that we don't have to hardcode the kubelet TLS port.
+ //
+ // https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml
+ //
+ // When contacting the API server, we hardcode the 'hswaw.net' DNS suffix as
+ // our API server's TLS certificate only has a CN/SAN for its full FQDN, not
+ // the .svc.cluster.local shorthand (see //cluster/clustercfg:clustercfg.py).
+
+ // Scrape Kubernetes node metrics via apiserver. This emites kube_node_* metrics.
+ kubeScrapeConfig("cluster_node_metrics", "node") {
+ relabel_configs: [
+ {
+ action: "labelmap",
+ regex: "__meta_kubernetes_node_label_(.+)",
+ },
+ {
+ action: "replace",
+ target_label: "__address__",
+ replacement: "kubernetes.default.svc.%s.hswaw.net:443" % [cluster.cfg.name],
+ },
+ {
+ target_label: "__metrics_path__",
+ source_labels: ["__meta_kubernetes_node_name"],
+ regex: "(.+)",
+ replacement: "/api/v1/nodes/${1}/proxy/metrics",
+ },
+ ],
+ },
+ // Scrape Kubernetes node cadvisor metrics via apiserver. This emits container_* metrics.
+ kubeScrapeConfig("cluster_cadvisor_metrics", "node") {
+ relabel_configs: [
+ {
+ action: "labelmap",
+ regex: "__meta_kubernetes_node_label_(.+)",
+ },
+ {
+ action: "replace",
+ target_label: "__address__",
+ replacement: "kubernetes.default.svc.%s.hswaw.net:443" % [cluster.cfg.name],
+ },
+ {
+ target_label: "__metrics_path__",
+ source_labels: ["__meta_kubernetes_node_name"],
+ regex: "(.+)",
+ replacement: "/api/v1/nodes/${1}/proxy/metrics/cadvisor",
+ },
+ ],
+ },
+ ],
+ },
+
+ configmap: kube.ConfigMap("prometheus-cluster") {
+ metadata+: {
+ namespace: cfg.namespace,
+ },
+ data: {
+ "prometheus.yml": std.manifestYamlDoc(prometheus.configuration),
+ },
+ },
+
+ sa: kube.ServiceAccount("prometheus-cluster") {
+ metadata+: {
+ namespace: cfg.namespace,
+ },
+ },
+
+ cr: kube.ClusterRole("monitoring-cluster-prometheus-server-%s" % [cfg.name]) {
+ rules: [
+ // Allow access to all metrics.
+ { nonResourceURLs: ["/metrics"], verbs: ["get"], },
+ // Allow to access node details for discovery.
+ { apiGroups: [""], resources: ["nodes"], verbs: ["list", "watch", "get"], },
+ // Allow to proxy to bare node HTTP to access per-node metrics endpoints.
+ { apiGroups: [""], resources: ["nodes/proxy"], verbs: ["get"], },
+ ],
+ },
+
+ crb: kube.ClusterRoleBinding("monitoring-cluster-prometheus-server-%s" % [cfg.name]) {
+ subjects_: [prometheus.sa],
+ roleRef_: prometheus.cr,
+ },
+
+ deploy: kube.Deployment("prometheus-cluster") {
+ metadata+: {
+ namespace: cfg.namespace,
+ },
+ spec+: {
+ template+: {
+ spec+: {
+ containers_: {
+ default: kube.Container("default") {
+ image: cfg.images.prometheus,
+ command: [
+ "/bin/prometheus",
+ "--config.file=/etc/prometheus/prometheus.yml",
+ "--storage.tsdb.path=/prometheus",
+ # TODO(q3k): reduce this once we have a long-term storage
+ # solution.
+ "--storage.tsdb.retention.time=120d",
+ "--web.console.libraries=/usr/share/prometheus/console_libraries",
+ "--web.console.templates=/usr/share/prometheus/consoles",
+ "--web.enable-lifecycle",
+ ],
+ resources: {
+ requests: {
+ memory: "256Mi",
+ cpu: "100m",
+ },
+ limits: {
+ memory: "1Gi",
+ cpu: "1",
+ },
+ },
+ volumeMounts_: {
+ data: { mountPath: "/prometheus", },
+ configmap: { mountPath: "/etc/prometheus", },
+ },
+ },
+ },
+ serviceAccountName: prometheus.sa.metadata.name,
+ tolerations: [
+ { key: "CriticalAddonsOnly", operator: "Exists" },
+ ],
+ volumes_: {
+ data: kube.PersistentVolumeClaimVolume(prometheus.pvc),
+ configmap: kube.ConfigMapVolume(prometheus.configmap),
+ },
+ },
+ },
+ },
+ },
+
+ // Kubernetes metric storage volume.
+ pvc: kube.PersistentVolumeClaim("prometheus-cluster") {
+ metadata+: {
+ namespace: cfg.namespace,
+ },
+ spec+: {
+ storageClassName: cfg.storageClasses.prometheus,
+ accessModes: ["ReadWriteOnce"],
+ resources: {
+ requests: {
+ storage: "32Gi",
+ },
+ },
+ },
+ },
+
+ // Network Policy governing access to the prometheus server.
+ np: kube.NetworkPolicy("prometheus-cluster") {
+ metadata+: {
+ namespace: cfg.namespace,
+ },
+ spec+: kube.podLabelsSelector(prometheus.deploy) {
+ ingress_: {
+ // Deny all inbound traffic to pod.
+ // This will be augmented to allow access from some other pod/namespace
+ // in the future.
+ },
+ egress_: {
+ // Allow all outbound traffic from pod.
+ outboundAll: {},
+ },
+ policyTypes: ["Ingress", "Egress"],
+ },
+ },
+ },
+ },
+}