ops/monitoring: split up jsonnet, add simple docs

Change-Id: I8120958a6862411de0446896875766834457aba9
diff --git a/ops/monitoring/lib/cluster.libsonnet b/ops/monitoring/lib/cluster.libsonnet
new file mode 100644
index 0000000..9b64f05
--- /dev/null
+++ b/ops/monitoring/lib/cluster.libsonnet
@@ -0,0 +1,227 @@
+local kube = import "../../../kube/kube.libsonnet";
+
+{
+    // Cluster sets up all cluster-specific monitoring resources in their own namespace.
+    // Currently this consists of a prometheus server that scrapes k8s nodes for kubelet
+    // and cAdvisor metrics.
+    Cluster(name):: {
+        local cluster = self,
+        local cfg = cluster.cfg,
+        cfg:: {
+            name: name,
+            namespace: "monitoring-cluster",
+
+            images: {
+                prometheus: "prom/prometheus:v2.18.1",
+            },
+
+            storageClasses: {
+                prometheus: error "storageClasses.prometheus must be set",
+            },
+        },
+
+        namespace: kube.Namespace(cfg.namespace),
+
+        prometheus: {
+            local prometheus = self,
+
+            // Configuration that's going to be emitted as prometheus.yml and passed to the
+            // prometheus server for this cluster.
+            configuration:: {
+                global: {
+                    external_labels: {
+                        cluster: cluster.cfg.name,
+                    },
+                },
+
+                // Constructor for a Kubernetes scrape job that uses the pod's service account and
+                // TLS configuration, selecting the given k8s scrape 'role'.
+                local kubeScrapeConfig = function(name, role) {
+                    job_name: name,
+                    scheme: "https",
+                    scrape_interval: "30s",
+                    kubernetes_sd_configs: [ { role: role }, ],
+                    tls_config: {
+                        ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
+                    },
+                    bearer_token_file: "/var/run/secrets/kubernetes.io/serviceaccount/token",
+                },
+
+                scrape_configs: [
+                    // When scraping node-based metrics (ie. node and cadvisor metrics) we contact
+                    // the metrics endpoints on the kubelet via the API server. This is done by
+                    // relabeling _address__ and __metrics_path__ to point at the k8s API server,
+                    // and at the API server proxy path to reach a node's metrics endpoint.
+                    //
+                    // This approach was lifted from the prometheus examples for Kubernetes, and
+                    // while the benefits outlined there do not matter that much to us (our
+                    // kubelets listen on public addresses, anyway), we still enjoy this approach
+                    // for the fact that we don't have to hardcode the kubelet TLS port.
+                    //
+                    // https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml
+                    //
+                    // When contacting the API server, we hardcode the 'hswaw.net' DNS suffix as
+                    // our API server's TLS certificate only has a CN/SAN for its full FQDN, not
+                    // the .svc.cluster.local shorthand (see //cluster/clustercfg:clustercfg.py).
+
+                    // Scrape Kubernetes node metrics via apiserver. This emites kube_node_* metrics.
+                    kubeScrapeConfig("cluster_node_metrics", "node") {
+                        relabel_configs: [
+                            {
+                                action: "labelmap",
+                                regex: "__meta_kubernetes_node_label_(.+)",
+                            },
+                            {
+                                action: "replace",
+                                target_label: "__address__",
+                                replacement: "kubernetes.default.svc.%s.hswaw.net:443" % [cluster.cfg.name],
+                            },
+                            {
+                                target_label: "__metrics_path__",
+                                source_labels: ["__meta_kubernetes_node_name"],
+                                regex: "(.+)",
+                                replacement: "/api/v1/nodes/${1}/proxy/metrics",
+                            },
+                        ],
+                    },
+                    // Scrape Kubernetes node cadvisor metrics via apiserver. This emits container_* metrics.
+                    kubeScrapeConfig("cluster_cadvisor_metrics", "node") {
+                        relabel_configs: [
+                            {
+                                action: "labelmap",
+                                regex: "__meta_kubernetes_node_label_(.+)",
+                            },
+                            {
+                                action: "replace",
+                                target_label: "__address__",
+                                replacement: "kubernetes.default.svc.%s.hswaw.net:443" % [cluster.cfg.name],
+                            },
+                            {
+                                target_label: "__metrics_path__",
+                                source_labels: ["__meta_kubernetes_node_name"],
+                                regex: "(.+)",
+                                replacement: "/api/v1/nodes/${1}/proxy/metrics/cadvisor",
+                            },
+                        ],
+                    },
+                ],
+            },
+
+            configmap: kube.ConfigMap("prometheus-cluster") {
+                metadata+: {
+                    namespace: cfg.namespace,
+                },
+                data: {
+                    "prometheus.yml": std.manifestYamlDoc(prometheus.configuration),
+                },
+            },
+
+            sa: kube.ServiceAccount("prometheus-cluster") {
+                metadata+: {
+                    namespace: cfg.namespace,
+                },
+            },
+
+            cr: kube.ClusterRole("monitoring-cluster-prometheus-server-%s" % [cfg.name]) {
+                rules: [
+                    // Allow access to all metrics.
+                    { nonResourceURLs: ["/metrics"], verbs: ["get"], },
+                    // Allow to access node details for discovery.
+                    { apiGroups: [""], resources: ["nodes"], verbs: ["list", "watch", "get"], },
+                    // Allow to proxy to bare node HTTP to access per-node metrics endpoints. 
+                    { apiGroups: [""], resources: ["nodes/proxy"], verbs: ["get"], },
+                ],
+            },
+
+            crb: kube.ClusterRoleBinding("monitoring-cluster-prometheus-server-%s" % [cfg.name]) {
+                subjects_: [prometheus.sa],
+                roleRef_: prometheus.cr,
+            },
+
+            deploy: kube.Deployment("prometheus-cluster") {
+                metadata+: {
+                    namespace: cfg.namespace,
+                },
+                spec+: {
+                    template+: {
+                        spec+: {
+                            containers_: {
+                                default: kube.Container("default") {
+                                    image: cfg.images.prometheus,
+                                    command: [
+                                        "/bin/prometheus",
+                                        "--config.file=/etc/prometheus/prometheus.yml",
+                                        "--storage.tsdb.path=/prometheus",
+                                        # TODO(q3k): reduce this once we have a long-term storage
+                                        # solution.
+                                        "--storage.tsdb.retention.time=120d",
+                                        "--web.console.libraries=/usr/share/prometheus/console_libraries",
+                                        "--web.console.templates=/usr/share/prometheus/consoles",
+                                        "--web.enable-lifecycle",
+                                    ],
+                                    resources: {
+                                        requests: {
+                                            memory: "256Mi",
+                                            cpu: "100m",
+                                        },
+                                        limits: {
+                                            memory: "1Gi",
+                                            cpu: "1",
+                                        },
+                                    },
+                                    volumeMounts_: {
+                                        data: { mountPath: "/prometheus", },
+                                        configmap: { mountPath: "/etc/prometheus", },
+                                    },
+                                },
+                            },
+                            serviceAccountName: prometheus.sa.metadata.name,
+                            tolerations: [
+                                { key: "CriticalAddonsOnly", operator: "Exists" },
+                            ],
+                            volumes_: {
+                                data: kube.PersistentVolumeClaimVolume(prometheus.pvc),
+                                configmap: kube.ConfigMapVolume(prometheus.configmap),
+                            },
+                        },
+                    },
+                },
+            },
+
+            // Kubernetes metric storage volume.
+            pvc: kube.PersistentVolumeClaim("prometheus-cluster") {
+                metadata+: {
+                    namespace: cfg.namespace,
+                },
+                spec+: {
+                    storageClassName: cfg.storageClasses.prometheus,
+                    accessModes: ["ReadWriteOnce"],
+                    resources: {
+                        requests: {
+                            storage: "32Gi",
+                        },
+                    },
+                },
+            },
+
+            // Network Policy governing access to the prometheus server.
+            np: kube.NetworkPolicy("prometheus-cluster") {
+                metadata+: {
+                    namespace: cfg.namespace,
+                },
+                spec+: kube.podLabelsSelector(prometheus.deploy) {
+                    ingress_: {
+                        // Deny all inbound traffic to pod.
+                        // This will be augmented to allow access from some other pod/namespace
+                        // in the future.
+                    },
+                    egress_: {
+                        // Allow all outbound traffic from pod.
+                        outboundAll: {},
+                    },
+                    policyTypes: ["Ingress", "Egress"],
+                },
+            },
+        },
+    },
+}