monitoring: global: implement
This creates a basic Global instance, running Victoria Metrics on k0.
Change-Id: Ib03003213d79b41cc54efe40cd2c4837f652c0f4
diff --git a/ops/monitoring/lib/cluster.libsonnet b/ops/monitoring/lib/cluster.libsonnet
index 9b64f05..511d426 100644
--- a/ops/monitoring/lib/cluster.libsonnet
+++ b/ops/monitoring/lib/cluster.libsonnet
@@ -2,8 +2,10 @@
{
// Cluster sets up all cluster-specific monitoring resources in their own namespace.
+ //
// Currently this consists of a prometheus server that scrapes k8s nodes for kubelet
- // and cAdvisor metrics.
+ // and cAdvisor metrics, and possibly ships over metrics to the global tier via set
+ // upstreams.
Cluster(name):: {
local cluster = self,
local cfg = cluster.cfg,
@@ -18,6 +20,17 @@
storageClasses: {
prometheus: error "storageClasses.prometheus must be set",
},
+
+ // Username used to authenticate to upstreams.
+ username: error "username must be set",
+
+ // Global tier upstreams that this cluster should ship metrics off to.
+ // List of
+ // {
+ // remote: URL of upstream
+ // password: password used to authenticate, in conjunction with cfg.username.
+ //
+ upstreams: [],
},
namespace: kube.Namespace(cfg.namespace),
@@ -105,6 +118,17 @@
],
},
],
+
+ remote_write: [
+ {
+ url: u.remote,
+ basic_auth: {
+ username: cluster.cfg.username,
+ password: u.password,
+ },
+ }
+ for u in cluster.cfg.upstreams
+ ],
},
configmap: kube.ConfigMap("prometheus-cluster") {
@@ -152,9 +176,7 @@
"/bin/prometheus",
"--config.file=/etc/prometheus/prometheus.yml",
"--storage.tsdb.path=/prometheus",
- # TODO(q3k): reduce this once we have a long-term storage
- # solution.
- "--storage.tsdb.retention.time=120d",
+ "--storage.tsdb.retention.size=10GB",
"--web.console.libraries=/usr/share/prometheus/console_libraries",
"--web.console.templates=/usr/share/prometheus/consoles",
"--web.enable-lifecycle",
@@ -198,7 +220,7 @@
accessModes: ["ReadWriteOnce"],
resources: {
requests: {
- storage: "32Gi",
+ storage: "16Gi",
},
},
},
diff --git a/ops/monitoring/lib/global.libsonnet b/ops/monitoring/lib/global.libsonnet
new file mode 100644
index 0000000..dbdbebb
--- /dev/null
+++ b/ops/monitoring/lib/global.libsonnet
@@ -0,0 +1,149 @@
+local kube = import "../../../kube/kube.libsonnet";
+
+{
+ // Global sets up a global tier instance of the hscloud monitoring infrastructure.
+ //
+ // This currently consists of Victoria Metrics, to which the agent tier sends metrics data via
+ // the prometheus remote_write protocol.
+ // Victoria Metrics is here used as a long-term storage solution. However, right now, it
+ // just keeps data locally on disk. In the future, S3 snapshots/backups should be introduced.
+ Global(name):: {
+ local global = self,
+ local cfg = global.cfg,
+
+ cfg:: {
+ name: name,
+ namespace: "monitoring-global-%s" % [cfg.name],
+
+ images: {
+ victoria: "victoriametrics/victoria-metrics:v1.40.0",
+ vmauth: "victoriametrics/vmauth:v1.40.0",
+ },
+
+ hosts: {
+ // DNS hostname that this global tier will use. Ingress will run under it.
+ globalAPI: error "hosts.globalAPI must be set",
+ },
+
+ storageClasses: {
+ // Storage class used for main data retention.
+ victoria: error "storageClasses.victoria must be set",
+ },
+
+ // A list of agents that will push metrics to this instance.
+ // List of:
+ // {
+ // username: the username that the agent will authenticate with
+ // password: the password that the agent will authenticate with
+ // }
+ agents: [],
+ },
+
+ // Generated URLs that agents should use to ship metrics over. Both require HTTP basic
+ // auth, configured via cfg.agents.
+ // The internal URL should be used for agents colocated in the same Kubernetes cluster.
+ internalIngestURL:: "http://%s/api/v1/write" % [global.victoria.serviceAPI.host_colon_port],
+ // The glboal URL should be used for agents sending data over the internet.
+ globalIngestURL:: "https://%s/api/v1/write" % [cfg.hosts.globalAPI],
+
+ namespace: kube.Namespace(cfg.namespace),
+ local ns = global.namespace,
+
+ victoria: {
+ local victoria = self,
+
+ pvc: ns.Contain(kube.PersistentVolumeClaim("victoria-data")) {
+ spec+: {
+ storageClassName: cfg.storageClasses.victoria,
+ accessModes: ["ReadWriteOnce"],
+ resources: {
+ requests: {
+ storage: "64Gi",
+ },
+ },
+ },
+ },
+
+ authSecret: ns.Contain(kube.Secret("vmauth")) {
+ data+: {
+ "config.yaml": std.base64(std.manifestJson({
+ users: [
+ {
+ username: a.username,
+ password: a.password,
+ url_prefix: "http://localhost:8428",
+ }
+ for a in cfg.agents
+ ],
+ }) + "\n")
+ },
+ },
+
+ deploy: ns.Contain(kube.Deployment("victoria")) {
+ spec+: {
+ template+: {
+ spec+: {
+ containers_: {
+ default: kube.Container("default") {
+ image: cfg.images.victoria,
+ volumeMounts_: {
+ data: { mountPath: "/victoria-metrics-data", },
+ },
+ },
+ vmauth: kube.Container("vmauth") {
+ image: cfg.images.vmauth,
+ command: [
+ "/vmauth-prod",
+ "-auth.config", "/mnt/secret/config.yaml",
+ ],
+ volumeMounts_: {
+ secret: { mountPath: "/mnt/secret", },
+ },
+ ports_: {
+ api: { containerPort: 8427 }
+ },
+ }
+ },
+ volumes_: {
+ data: kube.PersistentVolumeClaimVolume(victoria.pvc),
+ secret: kube.SecretVolume(victoria.authSecret),
+ },
+ },
+ },
+ },
+ },
+
+ serviceAPI: ns.Contain(kube.Service("victoria-api")) {
+ target_pod: victoria.deploy.spec.template,
+ spec+: {
+ ports: [
+ { name: "api", port: 8427, targetPort: 8427, protocol: "TCP" },
+ ],
+ type: "ClusterIP",
+ },
+ },
+
+ ingressAPI: ns.Contain(kube.Ingress("victoria-api")) {
+ metadata+: {
+ annotations+: {
+ "kubernetes.io/tls-acme": "true",
+ "certmanager.k8s.io/cluster-issuer": "letsencrypt-prod",
+ },
+ },
+ spec+: {
+ tls: [
+ { hosts: [cfg.hosts.globalAPI], secretName: "ingress-tls" },
+ ],
+ rules: [
+ {
+ host: cfg.hosts.globalAPI,
+ http: {
+ paths: [ { path: "/", backend: { serviceName: victoria.serviceAPI.metadata.name, servicePort: 8427 } }, ],
+ },
+ }
+ ],
+ },
+ },
+ },
+ }
+}