blob: f292289533da808c6d6c994fa601d19436c00efe [file] [log] [blame]
Serge Bazanski49154252020-10-24 20:22:08 +02001package main
2
3// tarify implements a minimal, self-contained, hermetic tarball builder.
4// It is currently used with gostatic to take a non-hermetic directory and
5// turn it into a hermetic tarball via a glob.
6//
7// For more information about tree artifacts and hermeticity, see:
8// https://jmmv.dev/2019/12/bazel-dynamic-execution-tree-artifacts.html
9
10import (
11 "archive/tar"
12 "flag"
13 "fmt"
14 "io"
15 "os"
16 "path/filepath"
17 "sort"
18 "strings"
19
20 "github.com/golang/glog"
21)
22
23var (
24 flagSite string
25 flagTarball string
26)
27
28func init() {
29 flag.Set("logtostderr", "true")
30}
31
32func main() {
33 flag.StringVar(&flagSite, "site", "", "Site sources")
34 flag.StringVar(&flagTarball, "tarball", "", "Output tarball")
35 flag.Parse()
36
37 if flagSite == "" {
38 glog.Exitf("-site must be set")
39 }
40 if flagTarball == "" {
41 glog.Exitf("-tarball must be set")
42 }
43
44 f, err := os.Create(flagTarball)
45 if err != nil {
46 glog.Exitf("Create(%q): %v", flagTarball, err)
47 }
48 defer f.Close()
49 w := tar.NewWriter(f)
50 defer w.Close()
51
52 flagSite = strings.TrimSuffix(flagSite, "/")
53
54 // First retrieve all files and sort. This is required for idempotency.
55 elems := []struct {
56 path string
57 info os.FileInfo
58 }{}
59 err = filepath.Walk(flagSite, func(inPath string, _ os.FileInfo, err error) error {
60 // We don't use the given fileinfo, as we want to deref symlinks.
61 info, err := os.Stat(inPath)
62 if err != nil {
63 return fmt.Errorf("Stat: %w", err)
64 }
65 elems = append(elems, struct {
66 path string
67 info os.FileInfo
68 }{inPath, info})
69 return nil
70 })
71 if err != nil {
72 glog.Exitf("Walk(%q, _): %v", flagSite, err)
73 }
74 sort.Slice(elems, func(i, j int) bool { return elems[i].path < elems[j].path })
75
76 // Now that we have a sorted list, tar 'em up.
77 for _, elem := range elems {
78 inPath := elem.path
79 info := elem.info
80
81 outPath := strings.TrimPrefix(strings.TrimPrefix(inPath, flagSite), "/")
82 if outPath == "" {
83 continue
84 }
85 if info.IsDir() {
86 glog.Infof("D %s", outPath)
87 if err := w.WriteHeader(&tar.Header{
88 Typeflag: tar.TypeDir,
89 Name: outPath,
90 Mode: 0755,
91 }); err != nil {
92 glog.Exitf("Writing directory header for %q failed: %v", inPath, err)
93 }
94 } else {
95 glog.Infof("F %s", outPath)
96 if err := w.WriteHeader(&tar.Header{
97 Typeflag: tar.TypeReg,
98 Name: outPath,
99 Mode: 0644,
100 // TODO(q3k): this can race (TOCTOU Stat/Open, resulting in "archive/tar: write Too long")
101 // No idea, how to handle this better though without reading the entire file into memory,
102 // or trying to do filesystem locks? Besides, in practical use with Bazel this will never
103 // happen.
104 Size: info.Size(),
105 }); err != nil {
106 glog.Exitf("Writing file header for %q failed: %v", inPath, err)
107 }
108 r, err := os.Open(inPath)
109 if err != nil {
110 glog.Exitf("Open(%q): %v", inPath, err)
111 }
112 defer r.Close()
113 if _, err := io.Copy(w, r); err != nil {
114 glog.Exitf("Copy(%q): %v", inPath, err)
115 }
116
117 }
118 }
119}