Merge "cluster/kube: split up cluster.jsonnet"
diff --git a/WORKSPACE b/WORKSPACE
index 138b93a..268259b 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -105,29 +105,6 @@
container_repositories()
-# Nix rules
-http_archive(
- name = "io_tweag_rules_nixpkgs",
- strip_prefix = "rules_nixpkgs-33c50ba64c11dddb95823d12f6b1324083cc5c43",
- urls = ["https://github.com/tweag/rules_nixpkgs/archive/33c50ba64c11dddb95823d12f6b1324083cc5c43.tar.gz"],
- sha256 = "91fedd5151bbd9ef89efc39e2172921bd7036c68cff54712a5df8ddf62bd6922",
-)
-
-# Nix packages
-
-load("@io_tweag_rules_nixpkgs//nixpkgs:nixpkgs.bzl", "nixpkgs_git_repository", "nixpkgs_package")
-
-nixpkgs_git_repository(
- name = "nixpkgs",
- revision = "2f1f9a9fe8a3c22f0677733523eaf6bd33995d50",
-)
-
-nixpkgs_package(
- name = "nixops",
- attribute_path = "nixops",
- repositories = {"nixpkgs": "@nixpkgs"},
-)
-
# Python rules
git_repository(
diff --git a/cluster/kube/lib/metrics.libsonnet b/cluster/kube/lib/metrics.libsonnet
index e11f5ef..fda3a59 100644
--- a/cluster/kube/lib/metrics.libsonnet
+++ b/cluster/kube/lib/metrics.libsonnet
@@ -1,4 +1,5 @@
# Deploy a per-cluster Metrics Server setup.
+# These are Kubernetes metrics, not Prometheus/whatever.
local kube = import "../../../kube/kube.libsonnet";
diff --git a/cluster/tools/install.sh b/cluster/tools/install.sh
index 08e3476..6f32fbb 100755
--- a/cluster/tools/install.sh
+++ b/cluster/tools/install.sh
@@ -15,9 +15,3 @@
//cluster/tools:calicoctl \
//cluster/tools:cfssl
-if [ ! -e /nix ] ; then
- echo "WARNING: No Nix installation detected. nix-dependent tools (nixops) will not be built or available."
-else
- bazel build \
- //cluster/tools:nixops
-fi
diff --git a/ops/monitoring/OWNERS b/ops/monitoring/OWNERS
new file mode 100644
index 0000000..318c819
--- /dev/null
+++ b/ops/monitoring/OWNERS
@@ -0,0 +1,3 @@
+owners:
+- q3k
+- implr
diff --git a/ops/monitoring/doc/index.md b/ops/monitoring/doc/index.md
new file mode 100644
index 0000000..b17287c
--- /dev/null
+++ b/ops/monitoring/doc/index.md
@@ -0,0 +1,38 @@
+Monitoring
+==========
+
+Setting up monitoring in hscloud is a work in progress.
+
+Components
+----------
+
+Currently we have a per-cluster setup with prometheus scraping Kubernetes nodes
+(kubeletes) for kubelet metrics and cAdvisor metrics.
+
+ .-----------------------------------------------------------.
+ | k0.hswaw.net |
+ |-----------------------------------------------------------|
+ | .---------------------. |
+ | | ns: metrics-cluster | .--------------------------. |
+ | |---------------------| | kubernetes.svc.cluster | |
+ | | prometheus |--> | apiserver proxy to nodes | |
+ | '---------------------' '--------------------------' |
+ | | |
+ '---------------------------------------- v ----------------'
+ .---------------------.
+ | bc0n01.hswaw.net |-.
+ Kubernetes |---------------------| |-.
+ Nodes | /metrics |-| |
+ | /metrics/cadvisor | |-|
+ '---------------------' | |
+ '---------------------' |
+ '---------------------'
+
+Everything else (dashboard, aggregation, user metrics) is a work in progress.
+
+Legacy
+------
+
+There is a legacy prometheus/grafana VM on https://metrics.hackerspace.pl/. The
+certificate is expired, but it Generally Works, and will be kept going until
+its functionality is migrated to hscloud.
diff --git a/ops/monitoring/k0.jsonnet b/ops/monitoring/k0.jsonnet
new file mode 100644
index 0000000..028a463
--- /dev/null
+++ b/ops/monitoring/k0.jsonnet
@@ -0,0 +1,11 @@
+local lib = import "lib.libsonnet";
+
+{
+ cluster: lib.Cluster("k0") {
+ cfg+: {
+ storageClasses+: {
+ prometheus: "waw-hdd-redundant-3",
+ },
+ },
+ },
+}
diff --git a/ops/monitoring/lib.libsonnet b/ops/monitoring/lib.libsonnet
new file mode 100644
index 0000000..61f49b4
--- /dev/null
+++ b/ops/monitoring/lib.libsonnet
@@ -0,0 +1,5 @@
+local cluster = import "lib/cluster.libsonnet";
+
+{
+ Cluster: cluster.Cluster,
+}
diff --git a/ops/monitoring/lib/cluster.libsonnet b/ops/monitoring/lib/cluster.libsonnet
new file mode 100644
index 0000000..9b64f05
--- /dev/null
+++ b/ops/monitoring/lib/cluster.libsonnet
@@ -0,0 +1,227 @@
+local kube = import "../../../kube/kube.libsonnet";
+
+{
+ // Cluster sets up all cluster-specific monitoring resources in their own namespace.
+ // Currently this consists of a prometheus server that scrapes k8s nodes for kubelet
+ // and cAdvisor metrics.
+ Cluster(name):: {
+ local cluster = self,
+ local cfg = cluster.cfg,
+ cfg:: {
+ name: name,
+ namespace: "monitoring-cluster",
+
+ images: {
+ prometheus: "prom/prometheus:v2.18.1",
+ },
+
+ storageClasses: {
+ prometheus: error "storageClasses.prometheus must be set",
+ },
+ },
+
+ namespace: kube.Namespace(cfg.namespace),
+
+ prometheus: {
+ local prometheus = self,
+
+ // Configuration that's going to be emitted as prometheus.yml and passed to the
+ // prometheus server for this cluster.
+ configuration:: {
+ global: {
+ external_labels: {
+ cluster: cluster.cfg.name,
+ },
+ },
+
+ // Constructor for a Kubernetes scrape job that uses the pod's service account and
+ // TLS configuration, selecting the given k8s scrape 'role'.
+ local kubeScrapeConfig = function(name, role) {
+ job_name: name,
+ scheme: "https",
+ scrape_interval: "30s",
+ kubernetes_sd_configs: [ { role: role }, ],
+ tls_config: {
+ ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",
+ },
+ bearer_token_file: "/var/run/secrets/kubernetes.io/serviceaccount/token",
+ },
+
+ scrape_configs: [
+ // When scraping node-based metrics (ie. node and cadvisor metrics) we contact
+ // the metrics endpoints on the kubelet via the API server. This is done by
+ // relabeling _address__ and __metrics_path__ to point at the k8s API server,
+ // and at the API server proxy path to reach a node's metrics endpoint.
+ //
+ // This approach was lifted from the prometheus examples for Kubernetes, and
+ // while the benefits outlined there do not matter that much to us (our
+ // kubelets listen on public addresses, anyway), we still enjoy this approach
+ // for the fact that we don't have to hardcode the kubelet TLS port.
+ //
+ // https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml
+ //
+ // When contacting the API server, we hardcode the 'hswaw.net' DNS suffix as
+ // our API server's TLS certificate only has a CN/SAN for its full FQDN, not
+ // the .svc.cluster.local shorthand (see //cluster/clustercfg:clustercfg.py).
+
+ // Scrape Kubernetes node metrics via apiserver. This emites kube_node_* metrics.
+ kubeScrapeConfig("cluster_node_metrics", "node") {
+ relabel_configs: [
+ {
+ action: "labelmap",
+ regex: "__meta_kubernetes_node_label_(.+)",
+ },
+ {
+ action: "replace",
+ target_label: "__address__",
+ replacement: "kubernetes.default.svc.%s.hswaw.net:443" % [cluster.cfg.name],
+ },
+ {
+ target_label: "__metrics_path__",
+ source_labels: ["__meta_kubernetes_node_name"],
+ regex: "(.+)",
+ replacement: "/api/v1/nodes/${1}/proxy/metrics",
+ },
+ ],
+ },
+ // Scrape Kubernetes node cadvisor metrics via apiserver. This emits container_* metrics.
+ kubeScrapeConfig("cluster_cadvisor_metrics", "node") {
+ relabel_configs: [
+ {
+ action: "labelmap",
+ regex: "__meta_kubernetes_node_label_(.+)",
+ },
+ {
+ action: "replace",
+ target_label: "__address__",
+ replacement: "kubernetes.default.svc.%s.hswaw.net:443" % [cluster.cfg.name],
+ },
+ {
+ target_label: "__metrics_path__",
+ source_labels: ["__meta_kubernetes_node_name"],
+ regex: "(.+)",
+ replacement: "/api/v1/nodes/${1}/proxy/metrics/cadvisor",
+ },
+ ],
+ },
+ ],
+ },
+
+ configmap: kube.ConfigMap("prometheus-cluster") {
+ metadata+: {
+ namespace: cfg.namespace,
+ },
+ data: {
+ "prometheus.yml": std.manifestYamlDoc(prometheus.configuration),
+ },
+ },
+
+ sa: kube.ServiceAccount("prometheus-cluster") {
+ metadata+: {
+ namespace: cfg.namespace,
+ },
+ },
+
+ cr: kube.ClusterRole("monitoring-cluster-prometheus-server-%s" % [cfg.name]) {
+ rules: [
+ // Allow access to all metrics.
+ { nonResourceURLs: ["/metrics"], verbs: ["get"], },
+ // Allow to access node details for discovery.
+ { apiGroups: [""], resources: ["nodes"], verbs: ["list", "watch", "get"], },
+ // Allow to proxy to bare node HTTP to access per-node metrics endpoints.
+ { apiGroups: [""], resources: ["nodes/proxy"], verbs: ["get"], },
+ ],
+ },
+
+ crb: kube.ClusterRoleBinding("monitoring-cluster-prometheus-server-%s" % [cfg.name]) {
+ subjects_: [prometheus.sa],
+ roleRef_: prometheus.cr,
+ },
+
+ deploy: kube.Deployment("prometheus-cluster") {
+ metadata+: {
+ namespace: cfg.namespace,
+ },
+ spec+: {
+ template+: {
+ spec+: {
+ containers_: {
+ default: kube.Container("default") {
+ image: cfg.images.prometheus,
+ command: [
+ "/bin/prometheus",
+ "--config.file=/etc/prometheus/prometheus.yml",
+ "--storage.tsdb.path=/prometheus",
+ # TODO(q3k): reduce this once we have a long-term storage
+ # solution.
+ "--storage.tsdb.retention.time=120d",
+ "--web.console.libraries=/usr/share/prometheus/console_libraries",
+ "--web.console.templates=/usr/share/prometheus/consoles",
+ "--web.enable-lifecycle",
+ ],
+ resources: {
+ requests: {
+ memory: "256Mi",
+ cpu: "100m",
+ },
+ limits: {
+ memory: "1Gi",
+ cpu: "1",
+ },
+ },
+ volumeMounts_: {
+ data: { mountPath: "/prometheus", },
+ configmap: { mountPath: "/etc/prometheus", },
+ },
+ },
+ },
+ serviceAccountName: prometheus.sa.metadata.name,
+ tolerations: [
+ { key: "CriticalAddonsOnly", operator: "Exists" },
+ ],
+ volumes_: {
+ data: kube.PersistentVolumeClaimVolume(prometheus.pvc),
+ configmap: kube.ConfigMapVolume(prometheus.configmap),
+ },
+ },
+ },
+ },
+ },
+
+ // Kubernetes metric storage volume.
+ pvc: kube.PersistentVolumeClaim("prometheus-cluster") {
+ metadata+: {
+ namespace: cfg.namespace,
+ },
+ spec+: {
+ storageClassName: cfg.storageClasses.prometheus,
+ accessModes: ["ReadWriteOnce"],
+ resources: {
+ requests: {
+ storage: "32Gi",
+ },
+ },
+ },
+ },
+
+ // Network Policy governing access to the prometheus server.
+ np: kube.NetworkPolicy("prometheus-cluster") {
+ metadata+: {
+ namespace: cfg.namespace,
+ },
+ spec+: kube.podLabelsSelector(prometheus.deploy) {
+ ingress_: {
+ // Deny all inbound traffic to pod.
+ // This will be augmented to allow access from some other pod/namespace
+ // in the future.
+ },
+ egress_: {
+ // Allow all outbound traffic from pod.
+ outboundAll: {},
+ },
+ policyTypes: ["Ingress", "Egress"],
+ },
+ },
+ },
+ },
+}