devtools/ci/remote-cache: init

This is a first pass at a Bazel remote cache. It notably does not yet do
any authentication, upload limits or garbage collection.

We won't be deploying it to prod until these are done.

Change-Id: I70a89dbe8b3ec933b2ce82e234a969e8337ba1d9
diff --git a/devtools/ci/remote-cache/service.go b/devtools/ci/remote-cache/service.go
new file mode 100644
index 0000000..70c9d18
--- /dev/null
+++ b/devtools/ci/remote-cache/service.go
@@ -0,0 +1,135 @@
+package main
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"encoding/hex"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"net/http"
+	"strings"
+
+	"github.com/golang/glog"
+	"github.com/minio/minio-go/v7"
+)
+
+type service struct {
+	objectClient  *minio.Client
+	objectBucket  string
+	objectPrefix  string
+	publicHandler http.Handler
+}
+
+func newService(objectClient *minio.Client, objectBucket, objectPrefix string) *service {
+	s := &service{
+		objectClient: objectClient,
+		objectBucket: objectBucket,
+		objectPrefix: objectPrefix,
+	}
+	mux := http.NewServeMux()
+	mux.HandleFunc("/", s.handlePublic)
+	s.publicHandler = mux
+	return s
+}
+
+func (s *service) handlePublic(w http.ResponseWriter, r *http.Request) {
+	ctx := r.Context()
+	switch r.Method {
+	case "GET":
+		// Always allow GET access to cache.
+	case "PUT":
+		// Require authentication for cache writes.
+		// TODO(q3k): implement
+	default:
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	parts := strings.Split(strings.TrimPrefix(r.URL.Path, "/"), "/")
+	if len(parts) != 2 {
+		http.NotFound(w, r)
+		return
+	}
+	switch parts[0] {
+	case "ac":
+	case "cas":
+	default:
+		http.NotFound(w, r)
+		return
+	}
+
+	if len(parts[1]) != 64 {
+		http.NotFound(w, r)
+		return
+	}
+
+	cacheKey := fmt.Sprintf("%s%s/%s", s.objectPrefix, parts[0], parts[1])
+	glog.Infof("%s %s %s", r.RemoteAddr, r.Method, cacheKey)
+
+	if r.Method == "GET" {
+		obj, err := s.objectClient.GetObject(ctx, s.objectBucket, cacheKey, minio.GetObjectOptions{})
+		if err != nil {
+			glog.Errorf("GetObject(%s, %s): %v", s.objectBucket, cacheKey, err)
+			http.Error(w, "could not contact object store", http.StatusInternalServerError)
+			return
+		}
+
+		_, err = obj.Stat()
+		// Minio-go doesn't seem to let us do this in any nicer way :/
+		if err != nil && err.Error() == "The specified key does not exist." {
+			http.NotFound(w, r)
+			return
+		} else if err != nil {
+			glog.Errorf("Stat(%s, %s): %v", s.objectBucket, cacheKey, err)
+			http.Error(w, "could not contact object store", http.StatusInternalServerError)
+			return
+		}
+
+		// Stream object to client.
+		io.Copy(w, obj)
+	}
+	if r.Method == "PUT" {
+		// Buffer the file, as we need to check its sha256.
+		// TODO(q3k): check and limit body size.
+		data, err := ioutil.ReadAll(r.Body)
+		if err != nil {
+			glog.Errorf("ReadAll: %v", err)
+			return
+		}
+		hashBytes := sha256.Sum256(data)
+		hash := hex.EncodeToString(hashBytes[:])
+		// Bazel cache uploads always seem to use lowercase sha256
+		// representations.
+		if parts[0] == "cas" && hash != parts[1] {
+			glog.Warningf("%s: sent PUT for %s with invalid hash %s", r.RemoteAddr, cacheKey, hash)
+			// Don't tell the user anything - Bazel won't care, anyway, and us
+			// logging this is probably good enough for debugging purposes.
+			return
+		}
+		// If the file already exists in the cache, ignore it. S3 doesn't seem
+		// to give us an upload-if-missing functionality?
+		_, err = s.objectClient.StatObject(ctx, s.objectBucket, cacheKey, minio.StatObjectOptions{})
+		if err == nil {
+			// File already exists, return early.
+			// This might not fire in case we fail to retrieve the object for
+			// some reason other than its nonexistence, but an error will be
+			// served for this at PutObject later on.
+			return
+		}
+
+		buffer := bytes.NewBuffer(data)
+		_, err = s.objectClient.PutObject(ctx, s.objectBucket, cacheKey, buffer, int64(len(data)), minio.PutObjectOptions{
+			UserMetadata: map[string]string{
+				"remote-cache-origin": r.RemoteAddr,
+			},
+		})
+		if err != nil {
+			// Swallow the error. Can't do much for the bazel writer, anyway.
+			// Retrying here isn't easy, as we don't want to become a
+			// qeueue/buffer unless really needed.
+			glog.Errorf("%s: PUT %s failed: %v", r.RemoteAddr, cacheKey, err)
+			return
+		}
+	}
+}