Blame - ops/monitoring/lib/cluster.libsonnet - hscloud

2020-06-06 17:04:07 +0200

[diff] [blame]

1

local kube = import "../../../kube/kube.libsonnet";

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

2

3

{

Sergiusz Bazanski

2020-06-06 17:04:07 +0200

[diff] [blame]

4

// Cluster sets up all cluster-specific monitoring resources in their own namespace.

Serge Bazanski

2020-08-24 21:00:56 +0200

[diff] [blame]

5

//

Sergiusz Bazanski

2020-06-06 17:04:07 +0200

[diff] [blame]

6

// Currently this consists of a prometheus server that scrapes k8s nodes for kubelet

Serge Bazanski

2020-08-24 21:00:56 +0200

[diff] [blame]

7

// and cAdvisor metrics, and possibly ships over metrics to the global tier via set

8

// upstreams.

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

9

Cluster(name):: {

10

local cluster = self,

11

local cfg = cluster.cfg,

12

cfg:: {

13

name: name,

14

namespace: "monitoring-cluster",

15

16

images: {

17

prometheus: "prom/prometheus:v2.18.1",

18

},

19

20

storageClasses: {

Sergiusz Bazanski

2020-06-06 17:04:07 +0200

[diff] [blame]

21

prometheus: error "storageClasses.prometheus must be set",

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

22

},

Serge Bazanski

2020-08-24 21:00:56 +0200

[diff] [blame]

23

24

// Username used to authenticate to upstreams.

25

username: error "username must be set",

26

27

// Global tier upstreams that this cluster should ship metrics off to.

28

// List of

29

// {

30

// remote: URL of upstream

31

// password: password used to authenticate, in conjunction with cfg.username.

32

//

33

upstreams: [],

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

34

},

35

36

namespace: kube.Namespace(cfg.namespace),

37

38

prometheus: {

39

local prometheus = self,

40

41

// Configuration that's going to be emitted as prometheus.yml and passed to the

42

// prometheus server for this cluster.

configuration:: {

global: {

external_labels: {

cluster: cluster.cfg.name,

},

},

// Constructor for a Kubernetes scrape job that uses the pod's service account and

51

// TLS configuration, selecting the given k8s scrape 'role'.

52

local kubeScrapeConfig = function(name, role) {

53

job_name: name,

54

scheme: "https",

55

scrape_interval: "30s",

56

kubernetes_sd_configs: [ { role: role }, ],

57

tls_config: {

58

ca_file: "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt",

59

},

60

bearer_token_file: "/var/run/secrets/kubernetes.io/serviceaccount/token",

61

},

62

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

63

Serge Bazanski

2020-10-10 15:57:12 +0200

[diff] [blame^]

64

// When scraping node-based metrics (ie. node and cadvisor metrics) we contact

65

// the metrics endpoints on the kubelet via the API server. This is done by

66

// relabeling _address__ and __metrics_path__ to point at the k8s API server,

67

// and at the API server proxy path to reach a node's metrics endpoint.

68

//

69

// This approach was lifted from the prometheus examples for Kubernetes, and

70

// while the benefits outlined there do not matter that much to us (our

71

// kubelets listen on public addresses, anyway), we still enjoy this approach

72

// for the fact that we don't have to hardcode the kubelet TLS port.

73

//

74

// https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml

75

//

76

// When contacting the API server, we hardcode the 'hswaw.net' DNS suffix as

77

// our API server's TLS certificate only has a CN/SAN for its full FQDN, not

78

// the .svc.cluster.local shorthand (see //cluster/clustercfg:clustercfg.py).

79

local kubeScrapeNodeMetrics = function(name, path) kubeScrapeConfig(name, "node") {

relabel_configs: [

{

action: "labelmap",

regex: "__meta_kubernetes_node_label_(.+)",

},

{

action: "replace",

target_label: "__address__",

88

replacement: "kubernetes.default.svc.%s.hswaw.net:443" % [cluster.cfg.name],

89

},

90

{

91

target_label: "__metrics_path__",

92

source_labels: ["__meta_kubernetes_node_name"],

93

regex: "(.+)",

94

replacement: "/api/v1/nodes/${1}/proxy" + path,

},

],

},

// When scraping API server-colocated metrics (ie. metrics from nixos services running alongside

100

// APIserver instances), we contact the metrics endpoints directly over the node's IP addresses

101

// and an external port. The node IP addresses are discovered via Prometheus kubernetes endpoint

102

// discovery which selects all endpoints for the default/kubernetes service. This service is

103

// backed by apiserver instances on public IP addresses. We can then rewrite the received port

104

// by the port of the service we're interested in to get to that service.

105

local kubeScrapeAPIServerColocated = function(name, port) kubeScrapeConfig(name, "endpoints") {

106

relabel_configs: [

107

// Select only endpoints that back the default/kubernetes service. These are all

108

// public IP addresses of nodes that run the API server.

109

{

110

action: "keep",

111

regex: "default;kubernetes;https",

112

source_labels: [

113

"__meta_kubernetes_namespace",

114

"__meta_kubernetes_service_name",

115

"__meta_kubernetes_endpoint_port_name",

116

],

117

},

118

] + (if port == 4001 then [] else [

119

// Replace endpoint port with requested port, if the requested port is not the apiserver's

120

// port 4001, which is the one returned by default for the these endpoints.

{

action: "replace",

regex: "([^:]+):.+",

replacement: "$1:%d" % [port],

source_labels: [

"__address__",

],

target_label: "__address__",

129

},

130

]),

131

// We disable server-side TLS certificate verification.

132

// Unfortunately, all apiserver-colocated services run with TLS certificates that do not have

133

// the right IP address SAN. Unfortunately, we can't override the TLS ServerName for a scrape

134

// target [1], so the only two choiced we are left with are:

135

// 1) re-emit relevant certificates with IP address SANs that allow for access by IP.

136

// 2) disable TLS verification.

137

// We choose 2), knowing that if someone manages to hijack a target IP address they can end up

138

// stealing our bearer token and impersonating the service account with which Prometheus is

139

// running. In the long term, we hope for [1] to be resolved.

140

//

141

// TODO(q3k): revisit this once [1] gets fixed.

142

// [1] - https://github.com/prometheus/prometheus/issues/4827

143

tls_config: {

144

insecure_skip_verify: true,

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

145

},

Serge Bazanski

2020-10-10 15:57:12 +0200

[diff] [blame^]

},

scrape_configs: [

/// Scrape per-node metrics, proxied via the APIServer..

150

// Scrape Kubernetes node metrics via apiserver. This emits kube_node_* metrics.

151

kubeScrapeNodeMetrics("cluster_node_metrics", "/metrics"),

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

152

// Scrape Kubernetes node cadvisor metrics via apiserver. This emits container_* metrics.

Serge Bazanski

2020-10-10 15:57:12 +0200

[diff] [blame^]

153

kubeScrapeNodeMetrics("cluster_cadvisor_metrics", "/metrics/cadvisor"),

154

155

/// Scape apiserver-colocated ('master node') metrics, over nodes' public IP addresses.

156

/// (currently all nodes are 'master' nodes)

157

// Scrape Kubernetes apiserver metrics.

158

kubeScrapeAPIServerColocated("cluster_apiserver_metrics", 4001),

159

// Scrape Kubernetes controller-manager metrics.

160

kubeScrapeAPIServerColocated("cluster_controllermanager_metrics", 4003),

161

// Scrape Kubernetes scheduler metrics.

162

kubeScrapeAPIServerColocated("cluster_scheduler_metrics", 4005),

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

163

],

Serge Bazanski

2020-08-24 21:00:56 +0200

[diff] [blame]

remote_write: [

{

url: u.remote,

basic_auth: {

username: cluster.cfg.username,

170

password: u.password,

171

},

172

}

173

for u in cluster.cfg.upstreams

174

],

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

175

},

176

177

configmap: kube.ConfigMap("prometheus-cluster") {

178

metadata+: {

179

namespace: cfg.namespace,

180

},

181

data: {

182

"prometheus.yml": std.manifestYamlDoc(prometheus.configuration),

},

},

sa: kube.ServiceAccount("prometheus-cluster") {

187

metadata+: {

188

namespace: cfg.namespace,

},

},

cr: kube.ClusterRole("monitoring-cluster-prometheus-server-%s" % [cfg.name]) {

193

rules: [

194

// Allow access to all metrics.

195

{ nonResourceURLs: ["/metrics"], verbs: ["get"], },

196

// Allow to access node details for discovery.

197

{ apiGroups: [""], resources: ["nodes"], verbs: ["list", "watch", "get"], },

Serge Bazanski

2020-10-10 15:57:12 +0200

[diff] [blame^]

198

{ apiGroups: [""], resources: ["endpoints", "services", "pods"], verbs: ["list", "watch", "get"], },

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

199

// Allow to proxy to bare node HTTP to access per-node metrics endpoints.

200

{ apiGroups: [""], resources: ["nodes/proxy"], verbs: ["get"], },

],

},

crb: kube.ClusterRoleBinding("monitoring-cluster-prometheus-server-%s" % [cfg.name]) {

205

subjects_: [prometheus.sa],

206

roleRef_: prometheus.cr,

207

},

208

209

deploy: kube.Deployment("prometheus-cluster") {

210

metadata+: {

211

namespace: cfg.namespace,

},

spec+: {

template+: {

spec+: {

containers_: {

default: kube.Container("default") {

218

image: cfg.images.prometheus,

219

command: [

220

"/bin/prometheus",

221

"--config.file=/etc/prometheus/prometheus.yml",

222

"--storage.tsdb.path=/prometheus",

Serge Bazanski

2020-08-24 21:00:56 +0200

[diff] [blame]

223

"--storage.tsdb.retention.size=10GB",

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

224

"--web.console.libraries=/usr/share/prometheus/console_libraries",

225

"--web.console.templates=/usr/share/prometheus/consoles",

226

"--web.enable-lifecycle",

227

],

228

resources: {

229

requests: {

Serge Bazanski

2020-10-10 15:57:12 +0200

[diff] [blame^]

230

memory: "3Gi",

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

231

cpu: "100m",

232

},

233

limits: {

Serge Bazanski

2020-10-10 15:57:12 +0200

[diff] [blame^]

234

memory: "3Gi",

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

cpu: "1",

},

},

volumeMounts_: {

data: { mountPath: "/prometheus", },

240

configmap: { mountPath: "/etc/prometheus", },

},

},

},

serviceAccountName: prometheus.sa.metadata.name,

245

tolerations: [

246

{ key: "CriticalAddonsOnly", operator: "Exists" },

247

],

248

volumes_: {

249

data: kube.PersistentVolumeClaimVolume(prometheus.pvc),

250

configmap: kube.ConfigMapVolume(prometheus.configmap),

},

},

},

},

},

// Kubernetes metric storage volume.

258

pvc: kube.PersistentVolumeClaim("prometheus-cluster") {

259

metadata+: {

260

namespace: cfg.namespace,

261

},

262

spec+: {

263

storageClassName: cfg.storageClasses.prometheus,

264

accessModes: ["ReadWriteOnce"],

265

resources: {

266

requests: {

Serge Bazanski

2020-08-24 21:00:56 +0200

[diff] [blame]

267

storage: "16Gi",

Sergiusz Bazanski

2020-06-06 12:35:06 +0200

[diff] [blame]

},

},

},

},

// Network Policy governing access to the prometheus server.

274

np: kube.NetworkPolicy("prometheus-cluster") {

275

metadata+: {

276

namespace: cfg.namespace,

277

},

278

spec+: kube.podLabelsSelector(prometheus.deploy) {

279

ingress_: {

280

// Deny all inbound traffic to pod.

281

// This will be augmented to allow access from some other pod/namespace

// in the future.

},

egress_: {

// Allow all outbound traffic from pod.

286

outboundAll: {},

287

},

288

policyTypes: ["Ingress", "Egress"],

},

},

},

},

Sergiusz Bazanski