Merge "k0.hswaw.net: pass metallb through Calico"
diff --git a/cluster/kube/k0.calico.yaml b/cluster/kube/k0.calico.yaml
new file mode 100644
index 0000000..eef2661
--- /dev/null
+++ b/cluster/kube/k0.calico.yaml
@@ -0,0 +1,78 @@
+# This is the current Calico configuration in k0.hswaw.net.
+# Unfortunately, we do not have Calico configured to use CRDs, and instead to
+# keep its resources separately from Kubernetes. Thus, this configuration
+# cannot be managed by Kubernetes/jsonnet. Instead, it must be applied manially:
+#
+# calicoctl apply -f k0.calico.yaml
+
+apiVersion: projectcalico.org/v3
+kind: BGPConfiguration
+metadata:
+ name: default
+spec:
+ logSeverityScreen: Info
+ nodeToNodeMeshEnabled: true
+ asNumber: 65003
+
+---
+
+# metallb peer, must be compatible with the metallbc definition in k0.libsonnet.
+apiVersion: projectcalico.org/v3
+kind: BGPPeer
+metadata:
+ name: metallb
+spec:
+ peerIP: 127.0.0.1
+ asNumber: 65002
+
+---
+
+# ToR switch peering, must be compatible with the configuration on dcsw01.hswaw.net.
+apiVersion: projectcalico.org/v3
+kind: BGPPeer
+metadata:
+ name: dcsw01
+spec:
+ peerIP: 185.236.240.33
+ asNumber: 65001
+
+---
+
+# IP pool that's used by metallb. We mark it as disabled so that Calico doesn't
+# allocate Service IPs from it, just allow metallb routes from that pool to
+# pass through eBGP (otherwise Calico BIRD filter will filter them out).
+# Keep in sync with k0.libsonnet.
+apiVersion: projectcalico.org/v3
+kind: IPPool
+metadata:
+ name: public-v4-1
+spec:
+ cidr: 185.236.240.48/28
+ disabled: true
+---
+
+# IP pool that's used by metallb. We mark it as disabled so that Calico doesn't
+# allocate Service IPs from it, just allow metallb routes from that pool to
+# pass through eBGP (otherwise Calico BIRD filter will filter them out).
+# Keep in sync with k0.libsonnet.
+apiVersion: projectcalico.org/v3
+kind: IPPool
+metadata:
+ name: public-v4-2
+spec:
+ cidr: 185.236.240.112/28
+ disabled: true
+
+---
+
+# IP pool for the service network.
+apiVersion: projectcalico.org/v3
+kind: IPPool
+metadata:
+ name: default-ipv4-ippool
+spec:
+ blockSize: 26
+ cidr: 10.10.24.0/21
+ ipipMode: CrossSubnet
+ natOutgoing: true
+
diff --git a/cluster/kube/k0.libsonnet b/cluster/kube/k0.libsonnet
index 6146085..45ae4c1 100644
--- a/cluster/kube/k0.libsonnet
+++ b/cluster/kube/k0.libsonnet
@@ -20,13 +20,15 @@
},
metallb+: {
cfg+: {
+ // Peer with calico running on same node.
peers: [
{
- "peer-address": "185.236.240.33",
- "peer-asn": 65001,
+ "peer-address": "127.0.0.1",
+ "peer-asn": 65003,
"my-asn": 65002,
},
],
+ // Public IP address pools. Keep in sync with k0.calico.yaml.
addressPools: [
{
name: "public-v4-1",
diff --git a/cluster/kube/lib/calico-bird-ipam.cfg.template b/cluster/kube/lib/calico-bird-ipam.cfg.template
new file mode 100644
index 0000000..869a480
--- /dev/null
+++ b/cluster/kube/lib/calico-bird-ipam.cfg.template
@@ -0,0 +1,66 @@
+# This is forked from bird.cfg.template from calico running on k0.hswaw.net on 2020/09/21.
+# Changed vs. upstream (C-f HSCLOUD):
+# - do not program RTD_UNREACHABLE routes into the kernel (these come from metallb, and
+# programming them seems to break things)
+# Generated by confd
+filter calico_export_to_bgp_peers {
+ calico_aggr();
+{{- $static_key := "/staticroutes"}}
+{{- if ls $static_key}}
+
+ # Export static routes.
+ {{- range ls $static_key}}
+ {{- $parts := split . "-"}}
+ {{- $cidr := join $parts "/"}}
+ if ( net ~ {{$cidr}} ) then { accept; }
+ {{- end}}
+{{- end}}
+{{range ls "/v1/ipam/v4/pool"}}{{$data := json (getv (printf "/v1/ipam/v4/pool/%s" .))}}
+ if ( net ~ {{$data.cidr}} ) then {
+ accept;
+ }
+{{- end}}
+ reject;
+}
+
+{{$network_key := printf "/bgp/v1/host/%s/network_v4" (getenv "NODENAME")}}
+filter calico_kernel_programming {
+{{- $reject_key := "/rejectcidrs"}}
+{{- if ls $reject_key}}
+
+ if ( dest = RTD_UNREACHABLE ) then { # HSCLOUD
+ reject;
+ }
+
+ # Don't program static routes into kernel.
+ {{- range ls $reject_key}}
+ {{- $parts := split . "-"}}
+ {{- $cidr := join $parts "/"}}
+ if ( net ~ {{$cidr}} ) then { reject; }
+ {{- end}}
+
+{{- end}}
+{{- if exists $network_key}}{{$network := getv $network_key}}
+{{range ls "/v1/ipam/v4/pool"}}{{$data := json (getv (printf "/v1/ipam/v4/pool/%s" .))}}
+ if ( net ~ {{$data.cidr}} ) then {
+{{- if $data.vxlan_mode}}
+ # Don't program VXLAN routes into the kernel - these are handled by Felix.
+ reject;
+ }
+{{- else if $data.ipip_mode}}{{if eq $data.ipip_mode "cross-subnet"}}
+ if defined(bgp_next_hop) && ( bgp_next_hop ~ {{$network}} ) then
+ krt_tunnel = ""; {{- /* Destination in ipPool, mode is cross sub-net, route from-host on subnet, do not use IPIP */}}
+ else
+ krt_tunnel = "{{$data.ipip}}"; {{- /* Destination in ipPool, mode is cross sub-net, route from-host off subnet, set the tunnel (if IPIP not enabled, value will be "") */}}
+ accept;
+ } {{- else}}
+ krt_tunnel = "{{$data.ipip}}"; {{- /* Destination in ipPool, mode not cross sub-net, set the tunnel (if IPIP not enabled, value will be "") */}}
+ accept;
+ } {{- end}} {{- else}}
+ krt_tunnel = "{{$data.ipip}}"; {{- /* Destination in ipPool, mode field is not present, set the tunnel (if IPIP not enabled, value will be "") */}}
+ accept;
+ } {{- end}}
+{{end}}
+{{- end}}{{/* End of 'exists $network_key' */}}
+ accept; {{- /* Destination is not in any ipPool, accept */}}
+}
diff --git a/cluster/kube/lib/calico-bird.cfg.template b/cluster/kube/lib/calico-bird.cfg.template
new file mode 100644
index 0000000..8a79deb
--- /dev/null
+++ b/cluster/kube/lib/calico-bird.cfg.template
@@ -0,0 +1,164 @@
+# This is forked from bird.cfg.template from calico running on k0.hswaw.net on 2020/09/21.
+# Changed vs. upstream (C-f HSCLOUD):
+# - set 'passive on' on 127.0.0.1 neighbors, used for estabilishing connectivity
+# with metallb.
+# Generated by confd
+include "bird_aggr.cfg";
+include "bird_ipam.cfg";
+
+{{- $node_ip_key := printf "/host/%s/ip_addr_v4" (getenv "NODENAME")}}{{$node_ip := getv $node_ip_key}}
+{{- $router_id := getenv "CALICO_ROUTER_ID" ""}}
+
+{{- $node_name := getenv "NODENAME"}}
+
+router id {{if eq "hash" ($router_id) -}}
+ {{hashToIPv4 $node_name}};
+{{- else -}}
+ {{if ne "" ($router_id)}}{{$router_id}}{{else}}{{$node_ip}}{{end}};
+{{- end}}
+
+{{- define "LOGGING"}}
+{{- $node_logging_key := printf "/host/%s/loglevel" (getenv "NODENAME")}}
+{{- if exists $node_logging_key}}
+{{- $logging := getv $node_logging_key}}
+{{- if eq $logging "debug"}}
+ debug all;
+{{- else if ne $logging "none"}}
+ debug { states };
+{{- end}}
+{{- else if exists "/global/loglevel"}}
+{{- $logging := getv "/global/loglevel"}}
+{{- if eq $logging "debug"}}
+ debug all;
+{{- else if ne $logging "none"}}
+ debug { states };
+{{- end}}
+{{- else}}
+ debug { states };
+{{- end}}
+{{- end}}
+
+# Configure synchronization between routing tables and kernel.
+protocol kernel {
+ learn; # Learn all alien routes from the kernel
+ persist; # Don't remove routes on bird shutdown
+ scan time 2; # Scan kernel routing table every 2 seconds
+ import all;
+ export filter calico_kernel_programming; # Default is export none
+ graceful restart; # Turn on graceful restart to reduce potential flaps in
+ # routes when reloading BIRD configuration. With a full
+ # automatic mesh, there is no way to prevent BGP from
+ # flapping since multiple nodes update their BGP
+ # configuration at the same time, GR is not guaranteed to
+ # work correctly in this scenario.
+}
+
+# Watch interface up/down events.
+protocol device {
+{{- template "LOGGING"}}
+ scan time 2; # Scan interfaces every 2 seconds
+}
+
+protocol direct {
+{{- template "LOGGING"}}
+ interface -"cali*", -"kube-ipvs*", "*"; # Exclude cali* and kube-ipvs* but
+ # include everything else. In
+ # IPVS-mode, kube-proxy creates a
+ # kube-ipvs0 interface. We exclude
+ # kube-ipvs0 because this interface
+ # gets an address for every in use
+ # cluster IP. We use static routes
+ # for when we legitimately want to
+ # export cluster IPs.
+}
+
+{{if eq "" ($node_ip)}}# IPv4 disabled on this node.
+{{else}}{{$node_as_key := printf "/host/%s/as_num" (getenv "NODENAME")}}
+# Template for all BGP clients
+template bgp bgp_template {
+{{- $as_key := or (and (exists $node_as_key) $node_as_key) "/global/as_num"}}
+{{- $node_as_num := getv $as_key}}
+{{- template "LOGGING"}}
+ description "Connection to BGP peer";
+ local as {{$node_as_num}};
+ multihop;
+ gateway recursive; # This should be the default, but just in case.
+ import all; # Import all routes, since we don't know what the upstream
+ # topology is and therefore have to trust the ToR/RR.
+ export filter calico_export_to_bgp_peers; # Only want to export routes for workloads.
+ source address {{$node_ip}}; # The local address we use for the TCP connection
+ add paths on;
+ graceful restart; # See comment in kernel section about graceful restart.
+ connect delay time 2;
+ connect retry time 5;
+ error wait time 5,30;
+}
+
+# ------------- Node-to-node mesh -------------
+{{- $node_cid_key := printf "/host/%s/rr_cluster_id" (getenv "NODENAME")}}
+{{- $node_cluster_id := getv $node_cid_key}}
+{{if (json (getv "/global/node_mesh")).enabled}}
+{{range $host := lsdir "/host"}}
+{{$onode_as_key := printf "/host/%s/as_num" .}}
+{{$onode_ip_key := printf "/host/%s/ip_addr_v4" .}}{{if exists $onode_ip_key}}{{$onode_ip := getv $onode_ip_key}}
+{{$nums := split $onode_ip "."}}{{$id := join $nums "_"}}
+# For peer {{$onode_ip_key}}
+{{if eq $onode_ip ($node_ip) }}# Skipping ourselves ({{$node_ip}})
+{{else if ne "" $onode_ip}}protocol bgp Mesh_{{$id}} from bgp_template {
+ neighbor {{$onode_ip}} as {{if exists $onode_as_key}}{{getv $onode_as_key}}{{else}}{{getv "/global/as_num"}}{{end}};
+ {{- /*
+ Make the peering unidirectional. This avoids a race where
+ - peer A opens a connection and begins a graceful restart
+ - before the restart completes, peer B opens its connection
+ - peer A sees the new connection and aborts the graceful restart, causing a route flap.
+ */ -}}
+ {{if gt $onode_ip $node_ip}}
+ passive on; # Mesh is unidirectional, peer will connect to us.
+ {{- end}}
+}{{end}}{{end}}{{end}}
+{{else}}
+# Node-to-node mesh disabled
+{{end}}
+
+
+# ------------- Global peers -------------
+{{if ls "/global/peer_v4"}}
+{{range gets "/global/peer_v4/*"}}{{$data := json .Value}}
+{{$nums := split $data.ip "."}}{{$id := join $nums "_"}}
+# For peer {{.Key}}
+{{- if eq $data.ip ($node_ip) }}
+# Skipping ourselves ({{$node_ip}})
+{{- else}}
+protocol bgp Global_{{$id}} from bgp_template {
+ {{if eq $data.ip ("127.0.0.1")}}passive on; # HSCLOUD {{end}}
+ neighbor {{$data.ip}} as {{$data.as_num}};
+{{- if and (eq $data.as_num $node_as_num) (ne "" ($node_cluster_id)) (ne $data.rr_cluster_id ($node_cluster_id))}}
+ rr client;
+ rr cluster id {{$node_cluster_id}};
+{{- end}}
+}
+{{- end}}
+{{end}}
+{{else}}# No global peers configured.{{end}}
+
+
+# ------------- Node-specific peers -------------
+{{$node_peers_key := printf "/host/%s/peer_v4" (getenv "NODENAME")}}
+{{if ls $node_peers_key}}
+{{range gets (printf "%s/*" $node_peers_key)}}{{$data := json .Value}}
+{{$nums := split $data.ip "."}}{{$id := join $nums "_"}}
+# For peer {{.Key}}
+{{- if eq $data.ip ($node_ip) }}
+# Skipping ourselves ({{$node_ip}})
+{{- else}}
+protocol bgp Node_{{$id}} from bgp_template {
+ neighbor {{$data.ip}} as {{$data.as_num}};
+{{- if and (eq $data.as_num $node_as_num) (ne "" ($node_cluster_id)) (ne $data.rr_cluster_id ($node_cluster_id))}}
+ rr client;
+ rr cluster id {{$node_cluster_id}};
+{{- end}}
+}
+{{- end}}
+{{end}}
+{{else}}# No node-specific peers configured.{{end}}
+{{end}}{{/* End of IPv4 enable check */}}
diff --git a/cluster/kube/lib/calico.libsonnet b/cluster/kube/lib/calico.libsonnet
index b5c83a7..1e2d503 100644
--- a/cluster/kube/lib/calico.libsonnet
+++ b/cluster/kube/lib/calico.libsonnet
@@ -230,6 +230,17 @@
},
},
+ # ConfigMap that holds overriden bird.cfg.template and bird_ipam.cfg.template.
+ calicoMetallbBird: kube.ConfigMap("calico-metallb-bird") {
+ metadata+: {
+ namespace: cfg.namespace,
+ },
+ data: {
+ "bird.cfg.template": (importstr "calico-bird.cfg.template"),
+ "bird_ipam.cfg.template": (importstr "calico-bird-ipam.cfg.template"),
+ },
+ },
+
nodeDaemon: kube.DaemonSet("calico-node") {
metadata+: {
namespace: cfg.namespace,
@@ -258,6 +269,7 @@
xtables_lock: kube.HostPathVolume("/run/xtables.lock"),
var_run_calico: kube.HostPathVolume("/var/run/calico"),
var_lib_calico: kube.HostPathVolume("/var/lib/calico"),
+ bird_cfg_template: kube.ConfigMapVolume(env.calicoMetallbBird),
},
initContainers_: {
installCNI: kube.Container("install-cni") {
@@ -335,6 +347,16 @@
var_lib_calico: { mountPath: "/var/lib/calico" },
secrets: { mountPath: env.cm.secretPrefix },
},
+ volumeMounts+: [
+ { name: "bird-cfg-template",
+ mountPath: "/etc/calico/confd/templates/bird.cfg.template",
+ subPath: "bird.cfg.template"
+ },
+ { name: "bird-cfg-template",
+ mountPath: "/etc/calico/confd/templates/bird_ipam.cfg.template",
+ subPath: "bird_ipam.cfg.template"
+ },
+ ],
},
},
},