cluster/identd/ident: add basic ident protocol client

This is the first pass at an ident protocol client. In the end, we want
to implement an ident protocol server for our in-cluster identd, but
starting out with a client helps me getting familiar with the protocol,
and will allow the server implementation to be tested against the
client.

Change-Id: Ic37b84577321533bab2f2fbf7fb53409a5defb95
diff --git a/cluster/identd/ident/BUILD.bazel b/cluster/identd/ident/BUILD.bazel
new file mode 100644
index 0000000..b672c92
--- /dev/null
+++ b/cluster/identd/ident/BUILD.bazel
@@ -0,0 +1,23 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "go_default_library",
+    srcs = [
+        "client.go",
+        "request.go",
+        "response.go",
+    ],
+    importpath = "code.hackerspace.pl/hscloud/cluster/identd/ident",
+    visibility = ["//visibility:public"],
+    deps = ["@com_github_golang_glog//:go_default_library"],
+)
+
+go_test(
+    name = "go_default_test",
+    srcs = [
+        "request_test.go",
+        "response_test.go",
+    ],
+    embed = [":go_default_library"],
+    deps = ["@com_github_go_test_deep//:go_default_library"],
+)
diff --git a/cluster/identd/ident/README.md b/cluster/identd/ident/README.md
new file mode 100644
index 0000000..00a117c
--- /dev/null
+++ b/cluster/identd/ident/README.md
@@ -0,0 +1,3 @@
+Implementation of the IDENT protocol (RFC 1413) protocl in Go.
+
+Currently implements a basic, not production tested client.
diff --git a/cluster/identd/ident/client.go b/cluster/identd/ident/client.go
new file mode 100644
index 0000000..c76e867
--- /dev/null
+++ b/cluster/identd/ident/client.go
@@ -0,0 +1,187 @@
+package ident
+
+import (
+	"bufio"
+	"context"
+	"fmt"
+	"io"
+	"net"
+	"strconv"
+
+	"github.com/golang/glog"
+)
+
+type DialOption func(d *dialOptions)
+
+type dialOptions struct {
+	dialer func(context.Context, string, string) (net.Conn, error)
+}
+
+// WithDialer configures a Client to use a given dial function instead of the
+// default implementation in net.
+func WithDialer(dialer func(context.Context, string, string) (net.Conn, error)) DialOption {
+	return func(d *dialOptions) {
+		d.dialer = dialer
+	}
+}
+
+// parseTarget interprets a target string (ie. the target address of the Dial
+// function) as an ident service address, either a host:port pair, or a host
+// (in which case the default ident port, 113, is used).
+func parseTarget(s string) (string, uint16, error) {
+	host, portStr, err := net.SplitHostPort(s)
+	if err == nil {
+		port, err := strconv.ParseUint(portStr, 10, 16)
+		if err != nil {
+			return "", 0, fmt.Errorf("can't parse port %q: %w", portStr, err)
+		}
+		return host, uint16(port), nil
+	}
+
+	// Doesn't look like a host:port pair? Default to port 113.
+	return s, 113, nil
+}
+
+// Dial sets up an ident protocol Client that will connect to the given target.
+// Target can be either a host:port pair, or just a host (in which case the
+// default ident port, 113, is used).
+// This does not actually connect to identd over TCP - that will be done, as
+// necessary, as requests are processed (including reconnections if multiple
+// requests are processed on a Client which connects to a server that does not
+// support long-standing ident donnections).
+func Dial(target string, options ...DialOption) (*Client, error) {
+	host, port, err := parseTarget(target)
+	if err != nil {
+		return nil, fmt.Errorf("invalid target: %v", err)
+	}
+
+	dialer := net.Dialer{}
+	opts := dialOptions{
+		dialer: dialer.DialContext,
+	}
+	for _, opt := range options {
+		opt(&opts)
+	}
+
+	return &Client{
+		opts:    opts,
+		target:  net.JoinHostPort(host, fmt.Sprintf("%d", port)),
+		conn:    nil,
+		scanner: nil,
+	}, nil
+}
+
+// Client is an ident protocol client. It maintains a connection to the ident
+// server that it's been configured for, reconnecting as necessary. It is not
+// safe to be used by multiple goroutines.
+type Client struct {
+	// opts are the dialOptions with which the client has been constructed.
+	opts dialOptions
+	// target is the full host:port pair that the client should connect to.
+	target string
+	// conn is either nil or an active TCP connection to the ident server.
+	conn net.Conn
+	// scannner is either nil or a line-scanner attached to the receive side of
+	// conn.
+	scanner *bufio.Scanner
+}
+
+func (c *Client) connect(ctx context.Context) error {
+	glog.V(1).Infof("Dialing IDENT at %q", c.target)
+	conn, err := c.opts.dialer(ctx, "tcp", c.target)
+	if err != nil {
+		return fmt.Errorf("connecting: %w", err)
+	}
+	c.conn = conn
+	c.scanner = bufio.NewScanner(conn)
+	return nil
+}
+
+func (c *Client) disconnect() {
+	if c.conn == nil {
+		return
+	}
+	c.conn.Close()
+	c.conn = nil
+}
+
+// Do executes the given Request against the server to which the Client is
+// connected.
+func (c *Client) Do(ctx context.Context, r *Request) (*Response, error) {
+	glog.V(1).Infof("Do(%+v)", r)
+
+	// Connect if needed.
+	if c.conn == nil {
+		if err := c.connect(ctx); err != nil {
+			return nil, err
+		}
+	}
+
+	// Start a goroutine that will perform the actual request/response
+	// processing to the server. A successful response will land in resC, while
+	// any protocl-level error will land in errC.
+	// We make both channels buffered, because if the context expires without a
+	// response, we want the goroutine to be able to write to them even though
+	// we're not receiving anymore. The channel will then be garbage collected.
+	resC := make(chan *Response, 1)
+	errC := make(chan error, 1)
+	go func() {
+		data := r.encode()
+		glog.V(3).Infof(" -> %q", data)
+		_, err := c.conn.Write(data)
+		if err != nil {
+			errC <- fmt.Errorf("Write: %w", err)
+			return
+		}
+		if !c.scanner.Scan() {
+			// scanner.Err() returns nil on EOF. We want that EOF, as the ident
+			// protocol has special meaning for EOF sent by the server
+			// (indicating either a lack of support for multiple requests per
+			// connection, or a refusal to serve at an early stage of the
+			// connection).
+			if err := c.scanner.Err(); err != nil {
+				errC <- fmt.Errorf("Read: %w", err)
+			} else {
+				errC <- fmt.Errorf("Read: %w", io.EOF)
+			}
+		}
+		data = c.scanner.Bytes()
+		glog.V(3).Infof(" <- %q", data)
+		resp, err := decodeResponse(data)
+		if err != nil {
+			errC <- err
+		} else {
+			resC <- resp
+		}
+	}()
+
+	select {
+	case <-ctx.Done():
+		// If the context is closed, fail with the context error and kill the
+		// connection. The running goroutine will error out on any pending
+		// network I/O and fail at some later point.
+		// TODO(q3k): make the communication goroutine long-lived and don't
+		// kill it here, just let it finish whatever it's doing and ignore the
+		// result.
+		c.disconnect()
+		return nil, ctx.Err()
+	case res := <-resC:
+		return res, nil
+	case err := <-errC:
+		// TODO(q3k): interpret EOF, which can mean different things at
+		// different times according to the RFC.
+		if c.conn != nil {
+			c.conn.Close()
+			c.conn = nil
+		}
+		return nil, err
+	}
+}
+
+// Close closes the Client, closing any underlying TCP connection.
+func (c *Client) Close() error {
+	if c.conn == nil {
+		return nil
+	}
+	return c.conn.Close()
+}
diff --git a/cluster/identd/ident/request.go b/cluster/identd/ident/request.go
new file mode 100644
index 0000000..9727893
--- /dev/null
+++ b/cluster/identd/ident/request.go
@@ -0,0 +1,20 @@
+package ident
+
+import (
+	"fmt"
+)
+
+// Request is an ident protocol request, as seen by the client or server.
+type Request struct {
+	// ClientPort is the port number on the client side of the indent protocol,
+	// ie. the port local to the ident client.
+	ClientPort uint16
+	// ServerPort is the port number on the server side of the ident protocol,
+	// ie. the port local to the ident server.
+	ServerPort uint16
+}
+
+// encode encodes ths Request as per RFC1413, including the terminating \r\n.
+func (r *Request) encode() []byte {
+	return []byte(fmt.Sprintf("%d,%d\r\n", r.ServerPort, r.ClientPort))
+}
diff --git a/cluster/identd/ident/request_test.go b/cluster/identd/ident/request_test.go
new file mode 100644
index 0000000..169ff00
--- /dev/null
+++ b/cluster/identd/ident/request_test.go
@@ -0,0 +1,14 @@
+package ident
+
+import "testing"
+
+// TestRequestEncode exercises the (simple) functionality of Reequest.encode.
+func TestRequestEncode(t *testing.T) {
+	r := Request{
+		ClientPort: 123,
+		ServerPort: 234,
+	}
+	if want, got := "234,123\r\n", string(r.encode()); want != got {
+		t.Errorf("Wanted %q, got %q", want, got)
+	}
+}
diff --git a/cluster/identd/ident/response.go b/cluster/identd/ident/response.go
new file mode 100644
index 0000000..5eab431
--- /dev/null
+++ b/cluster/identd/ident/response.go
@@ -0,0 +1,153 @@
+package ident
+
+import (
+	"fmt"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+var (
+	// reErrorReply matches error-reply from RFC1413, but also allows extra
+	// whitespace between significant tokens. It does not ensure that the
+	// error-type is one of the standardized values.
+	reErrorReply = regexp.MustCompile(`^\s*(\d{1,5})\s*,\s*(\d{1,5})\s*:\s*ERROR\s*:\s*(.+)$`)
+	// reIdentReply matches ident-reply from RFC1413, but also allows extra
+	// whitespace between significant tokens. It does not ensure that that
+	// opsys-field and user-id parts are RFC compliant.
+	reIdentReply = regexp.MustCompile(`^\s*(\d{1,5})\s*,\s*(\d{1,5})\s*:\s*USERID\s*:\s*([^:,]+)(,([^:]+))?\s*:(.+)$`)
+)
+
+// Response is an ident protocol response, as seen by the client or server.
+type Response struct {
+	// ClientPort is the port number on the client side of the indent protocol,
+	// ie. the port local to the ident client.
+	ClientPort uint16
+	// ServerPort is the port number on the server side of the ident protocol,
+	// ie. the port local to the ident server.
+	ServerPort uint16
+
+	// Exactly one of {Error, Ident} must be non-zero.
+
+	// Error is either NoError (the zero value) or one of the ErrorResponse
+	// types if this response represents an ident protocol error reply.
+	Error ErrorResponse
+	// Ident is either nil or a IdentResponse if this response represents an
+	// ident protocol ident reply.
+	Ident *IdentResponse
+}
+
+// ErrorResponse is error-type from RFC1413, indicating one of the possible
+// errors returned by the ident protocol server.
+type ErrorResponse string
+
+const (
+	// NoError is an ErrorResponse that indicates a lack of error.
+	NoError ErrorResponse = ""
+	// InvalidPort indicates that either the local or foreign port was
+	// improperly specified.
+	InvalidPort ErrorResponse = "INVALID-PORT"
+	// NoUser indicates that the port pair is not currently in use or currently
+	// not owned by an identifiable entity.
+	NoUser ErrorResponse = "NO-USER"
+	// HiddenUser indicates that the server was able to identify the user of
+	// this port, but the information was not returned at the request of the
+	// user.
+	HiddenUser ErrorResponse = "HIDDEN-USER"
+	// UnknownError indicates that the server could not determine the
+	// connection owner for an unknown reason.
+	UnknownError ErrorResponse = "UNKNOWN-ERROR"
+)
+
+// IsStandardError returns whether ErrorResponse represents a standard error.
+func (e ErrorResponse) IsStandardError() bool {
+	switch e {
+	case InvalidPort, NoUser, HiddenUser, UnknownError:
+		return true
+	default:
+		return false
+	}
+}
+
+// IsNonStandardError returns ehther the ErrorResponse represents a
+// non-standard error.
+func (e ErrorResponse) IsNonStandardError() bool {
+	return len(e) > 0 && e[0] == 'X'
+}
+
+func (e ErrorResponse) IsError() bool {
+	if e.IsStandardError() {
+		return true
+	}
+	if e.IsNonStandardError() {
+		return true
+	}
+	return false
+}
+
+// IdentResponse is the combined opsys, charset and user-id fields from
+// RFC1413. It represents a non-error response from the ident protocol server.
+type IdentResponse struct {
+	// OperatingSystem is an operating system identifier as per RFC1340. This
+	// is usually UNIX. OTHER has a special meaning, see RFC1413 for more
+	// information.
+	OperatingSystem string
+	// CharacterSet a character set as per RFC1340, defaulting to US-ASCII.
+	CharacterSet string
+	// UserID is the 'normal' user identification of the owner of the
+	// connection, unless the operating system is set to OTHER. See RFC1413 for
+	// more information.
+	UserID string
+}
+
+// decodeResponse parses the given bytes as an ident response. The data must be
+// stripped of the trailing \r\n.
+func decodeResponse(data []byte) (*Response, error) {
+	if match := reErrorReply.FindStringSubmatch(string(data)); match != nil {
+		serverPort, err := strconv.ParseUint(match[1], 10, 16)
+		if err != nil {
+			return nil, fmt.Errorf("invalid server port: %w", err)
+		}
+		clientPort, err := strconv.ParseUint(match[2], 10, 16)
+		if err != nil {
+			return nil, fmt.Errorf("invalid client port: %w", err)
+		}
+		errResp := ErrorResponse(strings.TrimSpace(match[3]))
+		if !errResp.IsError() {
+			// The RFC doesn't tell us what we should do in this case. For
+			// reliability, we downcast any unknown error to UNKNOWN-ERROR.
+			errResp = UnknownError
+		}
+		return &Response{
+			ClientPort: uint16(clientPort),
+			ServerPort: uint16(serverPort),
+			Error:      errResp,
+		}, nil
+	}
+	if match := reIdentReply.FindStringSubmatch(string(data)); match != nil {
+		serverPort, err := strconv.ParseUint(match[1], 10, 16)
+		if err != nil {
+			return nil, fmt.Errorf("invalid server port: %w", err)
+		}
+		clientPort, err := strconv.ParseUint(match[2], 10, 16)
+		if err != nil {
+			return nil, fmt.Errorf("invalid client port: %w", err)
+		}
+		os := strings.TrimSpace(match[3])
+		charset := strings.TrimSpace(match[5])
+		if charset == "" {
+			charset = "US-ASCII"
+		}
+		userid := strings.TrimSpace(match[6])
+		return &Response{
+			ClientPort: uint16(clientPort),
+			ServerPort: uint16(serverPort),
+			Ident: &IdentResponse{
+				OperatingSystem: os,
+				CharacterSet:    charset,
+				UserID:          userid,
+			},
+		}, nil
+	}
+	return nil, fmt.Errorf("unparseable response")
+}
diff --git a/cluster/identd/ident/response_test.go b/cluster/identd/ident/response_test.go
new file mode 100644
index 0000000..e768d46
--- /dev/null
+++ b/cluster/identd/ident/response_test.go
@@ -0,0 +1,61 @@
+package ident
+
+import (
+	"testing"
+
+	"github.com/go-test/deep"
+)
+
+// TestResponseDecode exercises the response decode implementation.
+func TestResponseDecode(t *testing.T) {
+	for i, te := range []struct {
+		data string
+		want *Response
+	}{
+		// 0: Everything okay, server returned error.
+		{"123, 234 : ERROR : INVALID-PORT", &Response{
+			ServerPort: 123,
+			ClientPort: 234,
+			Error:      InvalidPort,
+		}},
+		// 1: Everything okay, server returned error but also added some weird
+		// whitespace.
+		{" 123\t ,234  :ERROR:  NO-USER   ", &Response{
+			ServerPort: 123,
+			ClientPort: 234,
+			Error:      NoUser,
+		}},
+		// 2: Everything okay, server returned a simple ident response.
+		{"123,234 : USERID : UNIX :q3k", &Response{
+			ServerPort: 123,
+			ClientPort: 234,
+			Ident: &IdentResponse{
+				OperatingSystem: "UNIX",
+				CharacterSet:    "US-ASCII",
+				UserID:          "q3k",
+			},
+		}},
+		// 3: Everything okay, server returned an ident response with a
+		// charset.
+		{"123,234 : USERID : UNIX, PETSCII :q3k", &Response{
+			ServerPort: 123,
+			ClientPort: 234,
+			Ident: &IdentResponse{
+				OperatingSystem: "UNIX",
+				CharacterSet:    "PETSCII",
+				UserID:          "q3k",
+			},
+		}},
+	} {
+		res, err := decodeResponse([]byte(te.data))
+		if err != nil {
+			if te.want != nil {
+				t.Errorf("%d: wanted result, got err %v", i, err)
+			}
+		} else {
+			if diff := deep.Equal(te.want, res); diff != nil {
+				t.Errorf("%d: %s", i, diff)
+			}
+		}
+	}
+}