Merge "devtools/ci/remote-cache: init"
diff --git a/devtools/ci/remote-cache/BUILD.bazel b/devtools/ci/remote-cache/BUILD.bazel
new file mode 100644
index 0000000..4d46955
--- /dev/null
+++ b/devtools/ci/remote-cache/BUILD.bazel
@@ -0,0 +1,23 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = [
+        "main.go",
+        "service.go",
+    ],
+    importpath = "code.hackerspace.pl/hscloud/devtools/ci/remote-cache",
+    visibility = ["//visibility:private"],
+    deps = [
+        "//go/mirko:go_default_library",
+        "@com_github_golang_glog//:go_default_library",
+        "@com_github_minio_minio_go_v7//:go_default_library",
+        "@com_github_minio_minio_go_v7//pkg/credentials:go_default_library",
+    ],
+)
+
+go_binary(
+    name = "remote-cache",
+    embed = [":go_default_library"],
+    visibility = ["//visibility:public"],
+)
diff --git a/devtools/ci/remote-cache/README.md b/devtools/ci/remote-cache/README.md
new file mode 100644
index 0000000..667f39a
--- /dev/null
+++ b/devtools/ci/remote-cache/README.md
@@ -0,0 +1,34 @@
+remote-cache
+============
+
+A small Go service that acts as a [Bazel remote cache HTTP server](https://docs.bazel.build/versions/master/remote-caching.html#http-caching-protocol) and is backed in Ceph.
+
+Status
+------
+
+Work in progress, does not run on prod yet, needs write authentication support first.
+
+Building
+--------
+
+    bazel build //devtools/ci/remote-cache
+
+Running locally
+---------------
+
+For now, you'll have to manually acquire some Ceph RadosGW/S3 keys. When you have them:
+
+    bazel run //devtools/ci/remote-cache -- \
+        -object_access_key YOURACCESSKEY -object_secret_key yourSecretAccessKey -object_bucket your-bucket
+
+Then, tell Bazel to connect when building something:
+
+    bazel build --remote_cache=http://127.0.0.1:8080 //cluster/prodvider
+
+You should see something like this, if you ended up mostly doing GETs:
+
+    INFO: Elapsed time: 40.149s, Critical Path: 30.40s
+    INFO: 705 processes: 705 remote cache hit.
+    INFO: Build completed successfully, 718 total actions
+
+This will be slower than building without cache if you mostly PUT cache elements, and will likely even be slower on GETs unless you have excellent connectivity to k0. The remote cache is only a building block used to make builds faster, and we will need more things (eg. Remote Build Execution and CI) to actually get speedups for developer builds.
diff --git a/devtools/ci/remote-cache/main.go b/devtools/ci/remote-cache/main.go
new file mode 100644
index 0000000..dfb23a6
--- /dev/null
+++ b/devtools/ci/remote-cache/main.go
@@ -0,0 +1,77 @@
+package main
+
+import (
+	"flag"
+	"net"
+	"net/http"
+
+	"code.hackerspace.pl/hscloud/go/mirko"
+
+	"github.com/golang/glog"
+	"github.com/minio/minio-go/v7"
+	"github.com/minio/minio-go/v7/pkg/credentials"
+)
+
+var (
+	flagListenPublic    = ":8080"
+	flagObjectEndpoint  = "object.ceph-waw3.hswaw.net"
+	flagObjectAccessKey = ""
+	flagObjectSecretKey = ""
+	flagObjectBucket    = ""
+	flagObjectPrefix    = "cache/"
+)
+
+func main() {
+	flag.StringVar(&flagListenPublic, "listen_public", flagListenPublic, "Address to listen on for Bazel HTTP caching protocol clients")
+	flag.StringVar(&flagObjectEndpoint, "object_endpoint", flagObjectEndpoint, "Object Storage endpoint name")
+	flag.StringVar(&flagObjectAccessKey, "object_access_key", flagObjectEndpoint, "Object Storage AccessKey")
+	flag.StringVar(&flagObjectSecretKey, "object_secret_key", flagObjectEndpoint, "Object Storage SecretKey")
+	flag.StringVar(&flagObjectBucket, "object_bucket", flagObjectBucket, "Object Storage bucket name")
+	flag.StringVar(&flagObjectPrefix, "object_prefix", flagObjectPrefix, "Object Storage prefix for paths")
+	flag.Parse()
+
+	if flagObjectBucket == "" {
+		glog.Exitf("object_bucket must be set")
+	}
+
+	m := mirko.New()
+	if err := m.Listen(); err != nil {
+		glog.Exitf("Listen(): %v", err)
+	}
+
+	minioClient, err := minio.New(flagObjectEndpoint, &minio.Options{
+		Creds:  credentials.NewStaticV4(flagObjectAccessKey, flagObjectSecretKey, ""),
+		Secure: true,
+	})
+
+	if err != nil {
+		glog.Exitf("Failed to initialize Object Storage client: %v", err)
+	}
+
+	s := newService(minioClient, flagObjectBucket, flagObjectPrefix)
+
+	httpListen, err := net.Listen("tcp", flagListenPublic)
+	if err != nil {
+		glog.Exitf("net.Listen: %v", err)
+	}
+	httpServer := &http.Server{
+		Addr:    flagListenPublic,
+		Handler: s.publicHandler,
+	}
+
+	errs := make(chan error, 0)
+	go func() {
+		glog.Infof("Public listening on %s", flagListenPublic)
+		errs <- httpServer.Serve(httpListen)
+	}()
+
+	if err := m.Serve(); err != nil {
+		glog.Exitf("Serve(): %v", err)
+	}
+
+	select {
+	case <-m.Done():
+	case err := <-errs:
+		glog.Exitf("Serve(): %v", err)
+	}
+}
diff --git a/devtools/ci/remote-cache/service.go b/devtools/ci/remote-cache/service.go
new file mode 100644
index 0000000..70c9d18
--- /dev/null
+++ b/devtools/ci/remote-cache/service.go
@@ -0,0 +1,135 @@
+package main
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"encoding/hex"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"net/http"
+	"strings"
+
+	"github.com/golang/glog"
+	"github.com/minio/minio-go/v7"
+)
+
+type service struct {
+	objectClient  *minio.Client
+	objectBucket  string
+	objectPrefix  string
+	publicHandler http.Handler
+}
+
+func newService(objectClient *minio.Client, objectBucket, objectPrefix string) *service {
+	s := &service{
+		objectClient: objectClient,
+		objectBucket: objectBucket,
+		objectPrefix: objectPrefix,
+	}
+	mux := http.NewServeMux()
+	mux.HandleFunc("/", s.handlePublic)
+	s.publicHandler = mux
+	return s
+}
+
+func (s *service) handlePublic(w http.ResponseWriter, r *http.Request) {
+	ctx := r.Context()
+	switch r.Method {
+	case "GET":
+		// Always allow GET access to cache.
+	case "PUT":
+		// Require authentication for cache writes.
+		// TODO(q3k): implement
+	default:
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	parts := strings.Split(strings.TrimPrefix(r.URL.Path, "/"), "/")
+	if len(parts) != 2 {
+		http.NotFound(w, r)
+		return
+	}
+	switch parts[0] {
+	case "ac":
+	case "cas":
+	default:
+		http.NotFound(w, r)
+		return
+	}
+
+	if len(parts[1]) != 64 {
+		http.NotFound(w, r)
+		return
+	}
+
+	cacheKey := fmt.Sprintf("%s%s/%s", s.objectPrefix, parts[0], parts[1])
+	glog.Infof("%s %s %s", r.RemoteAddr, r.Method, cacheKey)
+
+	if r.Method == "GET" {
+		obj, err := s.objectClient.GetObject(ctx, s.objectBucket, cacheKey, minio.GetObjectOptions{})
+		if err != nil {
+			glog.Errorf("GetObject(%s, %s): %v", s.objectBucket, cacheKey, err)
+			http.Error(w, "could not contact object store", http.StatusInternalServerError)
+			return
+		}
+
+		_, err = obj.Stat()
+		// Minio-go doesn't seem to let us do this in any nicer way :/
+		if err != nil && err.Error() == "The specified key does not exist." {
+			http.NotFound(w, r)
+			return
+		} else if err != nil {
+			glog.Errorf("Stat(%s, %s): %v", s.objectBucket, cacheKey, err)
+			http.Error(w, "could not contact object store", http.StatusInternalServerError)
+			return
+		}
+
+		// Stream object to client.
+		io.Copy(w, obj)
+	}
+	if r.Method == "PUT" {
+		// Buffer the file, as we need to check its sha256.
+		// TODO(q3k): check and limit body size.
+		data, err := ioutil.ReadAll(r.Body)
+		if err != nil {
+			glog.Errorf("ReadAll: %v", err)
+			return
+		}
+		hashBytes := sha256.Sum256(data)
+		hash := hex.EncodeToString(hashBytes[:])
+		// Bazel cache uploads always seem to use lowercase sha256
+		// representations.
+		if parts[0] == "cas" && hash != parts[1] {
+			glog.Warningf("%s: sent PUT for %s with invalid hash %s", r.RemoteAddr, cacheKey, hash)
+			// Don't tell the user anything - Bazel won't care, anyway, and us
+			// logging this is probably good enough for debugging purposes.
+			return
+		}
+		// If the file already exists in the cache, ignore it. S3 doesn't seem
+		// to give us an upload-if-missing functionality?
+		_, err = s.objectClient.StatObject(ctx, s.objectBucket, cacheKey, minio.StatObjectOptions{})
+		if err == nil {
+			// File already exists, return early.
+			// This might not fire in case we fail to retrieve the object for
+			// some reason other than its nonexistence, but an error will be
+			// served for this at PutObject later on.
+			return
+		}
+
+		buffer := bytes.NewBuffer(data)
+		_, err = s.objectClient.PutObject(ctx, s.objectBucket, cacheKey, buffer, int64(len(data)), minio.PutObjectOptions{
+			UserMetadata: map[string]string{
+				"remote-cache-origin": r.RemoteAddr,
+			},
+		})
+		if err != nil {
+			// Swallow the error. Can't do much for the bazel writer, anyway.
+			// Retrying here isn't easy, as we don't want to become a
+			// qeueue/buffer unless really needed.
+			glog.Errorf("%s: PUT %s failed: %v", r.RemoteAddr, cacheKey, err)
+			return
+		}
+	}
+}