app/matrix: media repo proxy init

This implements media-repo-proxy, a lil' bit of Go to make our
infrastructure work with matrix-media-repo's concept of Host headers.

For some reason, MMR really wants Host: hackerspace.pl instead of Host:
matrix.hackerspace.pl. We'd fix that in their code, but with no tests
and with complex config reload logic it looks very daunting. We'd just
fix that in our Ingress, but that's not easy (no per-rule host
overrides).

So, we commit a tiny little itty bitty war crime and implement a piece
of Go code that serves as a rewriter for this.

This works, tested on boston:

    $ curl -H "Host: matrix.hackerspace.pl" 10.10.12.46:8080/_matrix/media/r0/download/hackerspace.pl/EwVBulPgCWDWNGMKjcOKGGbk | file -
    /dev/stdin: JPEG image data, JFIF standard 1.01, aspect ratio, density 1x1, segment length 16, baseline, precision 8, 650x300, components 3

(this address is media-repo.matrix.svc.k0.hswaw.net)

But hey, at least it has tests.

Change-Id: Ib6af1988fe8e112c9f3a5577506b18b48d80af62
Reviewed-on: https://gerrit.hackerspace.pl/c/hscloud/+/1143
Reviewed-by: q3k <q3k@hackerspace.pl>
diff --git a/app/matrix/lib/media-repo.libsonnet b/app/matrix/lib/media-repo.libsonnet
index 338dc78..90af77b 100644
--- a/app/matrix/lib/media-repo.libsonnet
+++ b/app/matrix/lib/media-repo.libsonnet
@@ -91,7 +91,56 @@
         },
     },
 
-    svc: app.ns.Contain(kube.Service("media-repo")) {
+    // Run //app/matrix/media-repo-proxy, if needed. This rewrites Host headers
+    // from the homeserver's serving Host to the MXID hostname (which
+    // matrix-media-repo expects).
+    // 
+    // Currently we only are able to run one proxy for one homeserver config -
+    // but we don't expect to have multiple homeservers per matrix-media-repo
+    // any time soon.
+    local needProxying = [
+        h
+        for h in cfg.homeservers
+        if "https://%s" % [h.name] != h.csApi
+    ],
+    proxies: if std.length(needProxying) > 1 then error "can only proxy one homeserver" else
+             if std.length(needProxying) == 1 then {
+        local homeserver = needProxying[0],
+
+        local upstreamHost = homeserver.name,
+        local prefix = "https://",
+        local downstreamHost = std.substr(homeserver.csApi, std.length(prefix), std.length(homeserver.csApi)-std.length(prefix)),
+
+        deployment: app.ns.Contain(kube.Deployment("media-repo-proxy")) {
+            spec+: {
+                template+: {
+                    spec+: {
+                        containers_: {
+                            default: kube.Container("default") {
+                                image: "registry.k0.hswaw.net/q3k/media-repo-proxy:1631791816-18609443fffde38a055f504e80f95e44f49d2481",
+                                command: [
+                                    "/app/matrix/media-repo-proxy",
+                                    "-downstream_host", downstreamHost,
+                                    "-upstream_host", upstreamHost,
+                                    "-upstream", app.internalSvc.host_colon_port,
+                                    "-listen", ":8080",
+                                ],
+                                ports_: {
+                                    http: { containerPort: 8080 },
+                                },
+                            },
+                        },
+                    },
+                },
+            },
+        },
+    } else {},
+
+    internalSvc: app.ns.Contain(kube.Service("media-repo-internal")) {
         target_pod:: app.deployment.spec.template,
     },
+
+    svc: if std.length(needProxying) > 0 then app.ns.Contain(kube.Service("media-repo")) {
+        target_pod:: app.proxies.deployment.spec.template,
+    } else app.internalSvc,
 }
diff --git a/app/matrix/media-repo-proxy/BUILD.bazel b/app/matrix/media-repo-proxy/BUILD.bazel
new file mode 100644
index 0000000..a56d881
--- /dev/null
+++ b/app/matrix/media-repo-proxy/BUILD.bazel
@@ -0,0 +1,47 @@
+load("@io_bazel_rules_docker//container:container.bzl", "container_image", "container_layer", "container_push")
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test")
+
+go_library(
+    name = "media-repo-proxy_lib",
+    srcs = ["main.go"],
+    importpath = "code.hackerspace.pl/hscloud/app/matrix/media-repo-proxy",
+    visibility = ["//visibility:private"],
+)
+
+go_binary(
+    name = "media-repo-proxy",
+    embed = [":media-repo-proxy_lib"],
+    visibility = ["//visibility:public"],
+)
+
+go_test(
+    name = "media-repo-proxy_test",
+    srcs = ["main_test.go"],
+    embed = [":media-repo-proxy_lib"],
+)
+
+container_layer(
+    name = "layer_bin",
+    files = [
+        ":media-repo-proxy",
+    ],
+    directory = "/app/matrix/",
+)
+
+container_image(
+    name = "runtime",
+    base = "@prodimage-bionic//image",
+    layers = [
+        ":layer_bin",
+    ],
+)
+
+container_push(
+    name = "push",
+    image = ":runtime",
+    format = "Docker",
+    registry = "registry.k0.hswaw.net",
+    repository = "q3k/media-repo-proxy",
+    tag = "1631791816-{STABLE_GIT_COMMIT}",
+)
+
diff --git a/app/matrix/media-repo-proxy/README.md b/app/matrix/media-repo-proxy/README.md
new file mode 100644
index 0000000..c9df4ad
--- /dev/null
+++ b/app/matrix/media-repo-proxy/README.md
@@ -0,0 +1,18 @@
+# Matrix-Media-Repository Proxy
+
+This is A Saurceful Of Go that between [Matrix Media Repo](https://github.com/turt2live/matrix-media-repo) instances and Ingresses.
+
+It has one job: rewrite Host headers. The reason for this is that matrix-media-repo wants Host: hackerspace.pl (MXID domain) while our traffic comes in with Host: matrix.hackerspace.pl (actual Host at which we serve Matrix).
+
+## Alternatives considered
+
+1. Rewriting this in Nginx Ingress Controller: not easy to do on a per-rule basis, would require some extra ingresses and dumb loopbacks.
+2. Fixing matrix-media-repo: not easy with the assumptions their code makes and with no tests that we can run.
+
+## Running
+
+Locally:
+
+    $ bazel run //app/matrix/media-repo-proxy -- -downstream_host=matrix.hackerspace.pl -upstream_host=hackerspace.pl -upstream=foo.bar.svc.cluster.local:8080
+
+In prod, should be part of jsonnet infra and be brought up as needed.
diff --git a/app/matrix/media-repo-proxy/main.go b/app/matrix/media-repo-proxy/main.go
new file mode 100644
index 0000000..920e89e
--- /dev/null
+++ b/app/matrix/media-repo-proxy/main.go
@@ -0,0 +1,79 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+	"net"
+	"net/http"
+	"net/http/httputil"
+)
+
+var (
+	flagUpstream       string
+	flagUpstreamHost   string
+	flagDownstreamHost string
+	flagListen         string
+)
+
+func newProxy() http.Handler {
+	proxy := httputil.ReverseProxy{
+		Director: func(r *http.Request) {
+			r.URL.Scheme = "http"
+			r.URL.Host = flagUpstream
+			r.Host = flagUpstreamHost
+			// MMR reads this field and prioritizes it over the Host header.
+			r.Header.Set("X-Forwarded-Host", flagUpstreamHost)
+		},
+	}
+
+	acl := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		remote := r.RemoteAddr
+		sip := r.Header.Get("Hscloud-Nic-Source-IP")
+		sport := r.Header.Get("Hscloud-Nic-Source-Port")
+		if sip != "" && sport != "" {
+			remote = net.JoinHostPort(sip, sport)
+			r.Header.Set("X-Forwarded-For", remote)
+		}
+		log.Printf("%s %s %s", remote, r.Method, r.URL.Path)
+
+		// ... during federation requests, Host is foo.example.com:443, strip
+		// that out if that's the case. Ignore port number, we don't care about
+		// it.
+		host, _, err := net.SplitHostPort(r.Host)
+		if err != nil {
+			// Error can mean many things, but generally it means 'no port', or
+			// a very malformed host. Regardless, just default to the raw
+			// value, we explicitly check it against a required host value
+			// further down
+			host = r.Host
+		}
+
+		if host != flagDownstreamHost {
+			log.Printf("Invalid host requested %q, wanted %q", r.Host, flagDownstreamHost)
+			w.WriteHeader(http.StatusBadRequest)
+			fmt.Fprintf(w, "invalid host\n")
+			return
+		}
+		proxy.ServeHTTP(w, r)
+	})
+
+	return acl
+}
+
+func main() {
+	flag.StringVar(&flagUpstreamHost, "upstream_host", "hackerspace.pl", "Upstream Host header, as sent to upstream")
+	flag.StringVar(&flagUpstream, "upstream", "foo.bar.svc.cluster.local:8080", "Address and port to reach upstream")
+	flag.StringVar(&flagDownstreamHost, "downstream_host", "matrix.hackerspace.pl", "Downstream Host header, as requested by client traffic")
+	flag.StringVar(&flagListen, "listen", ":8080", "Address to listen at for downstream traffic")
+	flag.Parse()
+
+	log.Printf("Starting media-repo-proxy")
+
+	proxy := newProxy()
+
+	log.Printf("Listening on %s...", flagListen)
+	if err := http.ListenAndServe(flagListen, proxy); err != nil {
+		log.Printf("Listen failed: %v", err)
+	}
+}
diff --git a/app/matrix/media-repo-proxy/main_test.go b/app/matrix/media-repo-proxy/main_test.go
new file mode 100644
index 0000000..ba4d4c3
--- /dev/null
+++ b/app/matrix/media-repo-proxy/main_test.go
@@ -0,0 +1,83 @@
+package main
+
+import (
+	"fmt"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"net/url"
+	"testing"
+)
+
+func TestForward(t *testing.T) {
+	// Test backend which proudly proclaims the value of the X-Forwarded-For header it received.
+	backendServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		fmt.Fprintf(w, "hello %s %s\n", r.Host, r.Header.Get("X-Forwarded-For"))
+	}))
+	defer backendServer.Close()
+	rpURL, err := url.Parse(backendServer.URL)
+	if err != nil {
+		t.Fatalf("parsing test backend URL failed: %v", err)
+	}
+
+	// Configure and run proxy.
+	flagUpstream = rpURL.Host
+	flagUpstreamHost = "example.com"
+	flagDownstreamHost = "matrix.example.com"
+	proxy := httptest.NewServer(newProxy())
+	defer proxy.Close()
+
+	// Run through a few tests.
+	for i, te := range []struct {
+		headers map[string]string
+		host    string
+		want    string
+	}{
+		{
+			// 0: expected to succeed
+			headers: map[string]string{
+				"Hscloud-Nic-Source-IP":   "1.2.3.4",
+				"Hscloud-Nic-Source-Port": "1337",
+			},
+			host: "matrix.example.com",
+			want: "hello example.com 1.2.3.4:1337, 127.0.0.1\n",
+		},
+		{
+			// 1: expected to succeed
+			host: "matrix.example.com",
+			want: "hello example.com 127.0.0.1\n",
+		},
+		{
+			// 2: expected to succeed
+			host: "matrix.example.com:443",
+			want: "hello example.com 127.0.0.1\n",
+		},
+		{
+			// 3: expected to fail
+			host: "example.com",
+			want: "invalid host\n",
+		},
+	} {
+		req, _ := http.NewRequest("GET", proxy.URL, nil)
+		req.Host = te.host
+		for k, v := range te.headers {
+			req.Header.Set(k, v)
+		}
+
+		resp, err := http.DefaultClient.Do(req)
+		if err != nil {
+			t.Fatalf("Get failed: %v", err)
+		}
+
+		b, err := io.ReadAll(resp.Body)
+		if err != nil {
+			t.Fatalf("Read failed: %v", err)
+		}
+		resp.Body.Close()
+
+		if want, got := te.want, string(b); want != got {
+			t.Errorf("%d: wrong response from upstream, wanted %q, got %q", i, want, got)
+		}
+	}
+
+}