cache.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. // Copyright 2017 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package cache implements a build artifact cache.
  5. //
  6. // This package is a slightly modified fork of Go's
  7. // cmd/go/internal/cache package.
  8. package cache
  9. import (
  10. "bytes"
  11. "crypto/sha256"
  12. "encoding/hex"
  13. "errors"
  14. "fmt"
  15. "io"
  16. "io/ioutil"
  17. "os"
  18. "path/filepath"
  19. "strconv"
  20. "strings"
  21. "time"
  22. "honnef.co/go/tools/internal/renameio"
  23. )
  24. // An ActionID is a cache action key, the hash of a complete description of a
  25. // repeatable computation (command line, environment variables,
  26. // input file contents, executable contents).
  27. type ActionID [HashSize]byte
  28. // An OutputID is a cache output key, the hash of an output of a computation.
  29. type OutputID [HashSize]byte
  30. // A Cache is a package cache, backed by a file system directory tree.
  31. type Cache struct {
  32. dir string
  33. now func() time.Time
  34. salt []byte
  35. }
  36. // Open opens and returns the cache in the given directory.
  37. //
  38. // It is safe for multiple processes on a single machine to use the
  39. // same cache directory in a local file system simultaneously.
  40. // They will coordinate using operating system file locks and may
  41. // duplicate effort but will not corrupt the cache.
  42. //
  43. // However, it is NOT safe for multiple processes on different machines
  44. // to share a cache directory (for example, if the directory were stored
  45. // in a network file system). File locking is notoriously unreliable in
  46. // network file systems and may not suffice to protect the cache.
  47. //
  48. func Open(dir string) (*Cache, error) {
  49. info, err := os.Stat(dir)
  50. if err != nil {
  51. return nil, err
  52. }
  53. if !info.IsDir() {
  54. return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
  55. }
  56. for i := 0; i < 256; i++ {
  57. name := filepath.Join(dir, fmt.Sprintf("%02x", i))
  58. if err := os.MkdirAll(name, 0777); err != nil {
  59. return nil, err
  60. }
  61. }
  62. c := &Cache{
  63. dir: dir,
  64. now: time.Now,
  65. }
  66. return c, nil
  67. }
  68. func (c *Cache) SetSalt(b []byte) {
  69. c.salt = b
  70. }
  71. // fileName returns the name of the file corresponding to the given id.
  72. func (c *Cache) fileName(id [HashSize]byte, key string) string {
  73. return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
  74. }
  75. // An entryNotFoundError indicates that a cache entry was not found, with an
  76. // optional underlying reason.
  77. type entryNotFoundError struct {
  78. Err error
  79. }
  80. func (e *entryNotFoundError) Error() string {
  81. if e.Err == nil {
  82. return "cache entry not found"
  83. }
  84. return fmt.Sprintf("cache entry not found: %v", e.Err)
  85. }
  86. func (e *entryNotFoundError) Unwrap() error {
  87. return e.Err
  88. }
  89. const (
  90. // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
  91. hexSize = HashSize * 2
  92. entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1
  93. )
  94. // verify controls whether to run the cache in verify mode.
  95. // In verify mode, the cache always returns errMissing from Get
  96. // but then double-checks in Put that the data being written
  97. // exactly matches any existing entry. This provides an easy
  98. // way to detect program behavior that would have been different
  99. // had the cache entry been returned from Get.
  100. //
  101. // verify is enabled by setting the environment variable
  102. // GODEBUG=gocacheverify=1.
  103. var verify = false
  104. var errVerifyMode = errors.New("gocacheverify=1")
  105. // DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
  106. var DebugTest = false
  107. func init() { initEnv() }
  108. func initEnv() {
  109. verify = false
  110. debugHash = false
  111. debug := strings.Split(os.Getenv("GODEBUG"), ",")
  112. for _, f := range debug {
  113. if f == "gocacheverify=1" {
  114. verify = true
  115. }
  116. if f == "gocachehash=1" {
  117. debugHash = true
  118. }
  119. if f == "gocachetest=1" {
  120. DebugTest = true
  121. }
  122. }
  123. }
  124. // Get looks up the action ID in the cache,
  125. // returning the corresponding output ID and file size, if any.
  126. // Note that finding an output ID does not guarantee that the
  127. // saved file for that output ID is still available.
  128. func (c *Cache) Get(id ActionID) (Entry, error) {
  129. if verify {
  130. return Entry{}, &entryNotFoundError{Err: errVerifyMode}
  131. }
  132. return c.get(id)
  133. }
  134. type Entry struct {
  135. OutputID OutputID
  136. Size int64
  137. Time time.Time
  138. }
  139. // get is Get but does not respect verify mode, so that Put can use it.
  140. func (c *Cache) get(id ActionID) (Entry, error) {
  141. missing := func(reason error) (Entry, error) {
  142. return Entry{}, &entryNotFoundError{Err: reason}
  143. }
  144. f, err := os.Open(c.fileName(id, "a"))
  145. if err != nil {
  146. return missing(err)
  147. }
  148. defer f.Close()
  149. entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
  150. if n, err := io.ReadFull(f, entry); n > entrySize {
  151. return missing(errors.New("too long"))
  152. } else if err != io.ErrUnexpectedEOF {
  153. if err == io.EOF {
  154. return missing(errors.New("file is empty"))
  155. }
  156. return missing(err)
  157. } else if n < entrySize {
  158. return missing(errors.New("entry file incomplete"))
  159. }
  160. if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
  161. return missing(errors.New("invalid header"))
  162. }
  163. eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
  164. eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
  165. esize, entry := entry[1:1+20], entry[1+20:]
  166. //lint:ignore SA4006 See https://github.com/dominikh/go-tools/issues/465
  167. etime, entry := entry[1:1+20], entry[1+20:]
  168. var buf [HashSize]byte
  169. if _, err := hex.Decode(buf[:], eid); err != nil {
  170. return missing(fmt.Errorf("decoding ID: %v", err))
  171. } else if buf != id {
  172. return missing(errors.New("mismatched ID"))
  173. }
  174. if _, err := hex.Decode(buf[:], eout); err != nil {
  175. return missing(fmt.Errorf("decoding output ID: %v", err))
  176. }
  177. i := 0
  178. for i < len(esize) && esize[i] == ' ' {
  179. i++
  180. }
  181. size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
  182. if err != nil {
  183. return missing(fmt.Errorf("parsing size: %v", err))
  184. } else if size < 0 {
  185. return missing(errors.New("negative size"))
  186. }
  187. i = 0
  188. for i < len(etime) && etime[i] == ' ' {
  189. i++
  190. }
  191. tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
  192. if err != nil {
  193. return missing(fmt.Errorf("parsing timestamp: %v", err))
  194. } else if tm < 0 {
  195. return missing(errors.New("negative timestamp"))
  196. }
  197. c.used(c.fileName(id, "a"))
  198. return Entry{buf, size, time.Unix(0, tm)}, nil
  199. }
  200. // GetFile looks up the action ID in the cache and returns
  201. // the name of the corresponding data file.
  202. func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
  203. entry, err = c.Get(id)
  204. if err != nil {
  205. return "", Entry{}, err
  206. }
  207. file = c.OutputFile(entry.OutputID)
  208. info, err := os.Stat(file)
  209. if err != nil {
  210. return "", Entry{}, &entryNotFoundError{Err: err}
  211. }
  212. if info.Size() != entry.Size {
  213. return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
  214. }
  215. return file, entry, nil
  216. }
  217. // GetBytes looks up the action ID in the cache and returns
  218. // the corresponding output bytes.
  219. // GetBytes should only be used for data that can be expected to fit in memory.
  220. func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
  221. entry, err := c.Get(id)
  222. if err != nil {
  223. return nil, entry, err
  224. }
  225. data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID))
  226. if sha256.Sum256(data) != entry.OutputID {
  227. return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")}
  228. }
  229. return data, entry, nil
  230. }
  231. // OutputFile returns the name of the cache file storing output with the given OutputID.
  232. func (c *Cache) OutputFile(out OutputID) string {
  233. file := c.fileName(out, "d")
  234. c.used(file)
  235. return file
  236. }
  237. // Time constants for cache expiration.
  238. //
  239. // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour),
  240. // to avoid causing many unnecessary inode updates. The mtimes therefore
  241. // roughly reflect "time of last use" but may in fact be older by at most an hour.
  242. //
  243. // We scan the cache for entries to delete at most once per trimInterval (1 day).
  244. //
  245. // When we do scan the cache, we delete entries that have not been used for
  246. // at least trimLimit (5 days). Statistics gathered from a month of usage by
  247. // Go developers found that essentially all reuse of cached entries happened
  248. // within 5 days of the previous reuse. See golang.org/issue/22990.
  249. const (
  250. mtimeInterval = 1 * time.Hour
  251. trimInterval = 24 * time.Hour
  252. trimLimit = 5 * 24 * time.Hour
  253. )
  254. // used makes a best-effort attempt to update mtime on file,
  255. // so that mtime reflects cache access time.
  256. //
  257. // Because the reflection only needs to be approximate,
  258. // and to reduce the amount of disk activity caused by using
  259. // cache entries, used only updates the mtime if the current
  260. // mtime is more than an hour old. This heuristic eliminates
  261. // nearly all of the mtime updates that would otherwise happen,
  262. // while still keeping the mtimes useful for cache trimming.
  263. func (c *Cache) used(file string) {
  264. info, err := os.Stat(file)
  265. if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval {
  266. return
  267. }
  268. os.Chtimes(file, c.now(), c.now())
  269. }
  270. // Trim removes old cache entries that are likely not to be reused.
  271. func (c *Cache) Trim() {
  272. now := c.now()
  273. // We maintain in dir/trim.txt the time of the last completed cache trim.
  274. // If the cache has been trimmed recently enough, do nothing.
  275. // This is the common case.
  276. data, _ := renameio.ReadFile(filepath.Join(c.dir, "trim.txt"))
  277. t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
  278. if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval {
  279. return
  280. }
  281. // Trim each of the 256 subdirectories.
  282. // We subtract an additional mtimeInterval
  283. // to account for the imprecision of our "last used" mtimes.
  284. cutoff := now.Add(-trimLimit - mtimeInterval)
  285. for i := 0; i < 256; i++ {
  286. subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i))
  287. c.trimSubdir(subdir, cutoff)
  288. }
  289. // Ignore errors from here: if we don't write the complete timestamp, the
  290. // cache will appear older than it is, and we'll trim it again next time.
  291. renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666)
  292. }
  293. // trimSubdir trims a single cache subdirectory.
  294. func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
  295. // Read all directory entries from subdir before removing
  296. // any files, in case removing files invalidates the file offset
  297. // in the directory scan. Also, ignore error from f.Readdirnames,
  298. // because we don't care about reporting the error and we still
  299. // want to process any entries found before the error.
  300. f, err := os.Open(subdir)
  301. if err != nil {
  302. return
  303. }
  304. names, _ := f.Readdirnames(-1)
  305. f.Close()
  306. for _, name := range names {
  307. // Remove only cache entries (xxxx-a and xxxx-d).
  308. if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") {
  309. continue
  310. }
  311. entry := filepath.Join(subdir, name)
  312. info, err := os.Stat(entry)
  313. if err == nil && info.ModTime().Before(cutoff) {
  314. os.Remove(entry)
  315. }
  316. }
  317. }
  318. // putIndexEntry adds an entry to the cache recording that executing the action
  319. // with the given id produces an output with the given output id (hash) and size.
  320. func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
  321. // Note: We expect that for one reason or another it may happen
  322. // that repeating an action produces a different output hash
  323. // (for example, if the output contains a time stamp or temp dir name).
  324. // While not ideal, this is also not a correctness problem, so we
  325. // don't make a big deal about it. In particular, we leave the action
  326. // cache entries writable specifically so that they can be overwritten.
  327. //
  328. // Setting GODEBUG=gocacheverify=1 does make a big deal:
  329. // in verify mode we are double-checking that the cache entries
  330. // are entirely reproducible. As just noted, this may be unrealistic
  331. // in some cases but the check is also useful for shaking out real bugs.
  332. entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano())
  333. if verify && allowVerify {
  334. old, err := c.get(id)
  335. if err == nil && (old.OutputID != out || old.Size != size) {
  336. // panic to show stack trace, so we can see what code is generating this cache entry.
  337. msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size)
  338. panic(msg)
  339. }
  340. }
  341. file := c.fileName(id, "a")
  342. // Copy file to cache directory.
  343. mode := os.O_WRONLY | os.O_CREATE
  344. f, err := os.OpenFile(file, mode, 0666)
  345. if err != nil {
  346. return err
  347. }
  348. _, err = f.WriteString(entry)
  349. if err == nil {
  350. // Truncate the file only *after* writing it.
  351. // (This should be a no-op, but truncate just in case of previous corruption.)
  352. //
  353. // This differs from ioutil.WriteFile, which truncates to 0 *before* writing
  354. // via os.O_TRUNC. Truncating only after writing ensures that a second write
  355. // of the same content to the same file is idempotent, and does not — even
  356. // temporarily! — undo the effect of the first write.
  357. err = f.Truncate(int64(len(entry)))
  358. }
  359. if closeErr := f.Close(); err == nil {
  360. err = closeErr
  361. }
  362. if err != nil {
  363. // TODO(bcmills): This Remove potentially races with another go command writing to file.
  364. // Can we eliminate it?
  365. os.Remove(file)
  366. return err
  367. }
  368. os.Chtimes(file, c.now(), c.now()) // mainly for tests
  369. return nil
  370. }
  371. // Put stores the given output in the cache as the output for the action ID.
  372. // It may read file twice. The content of file must not change between the two passes.
  373. func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
  374. return c.put(id, file, true)
  375. }
  376. // PutNoVerify is like Put but disables the verify check
  377. // when GODEBUG=goverifycache=1 is set.
  378. // It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
  379. // like test output containing times and the like.
  380. func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
  381. return c.put(id, file, false)
  382. }
  383. func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
  384. // Compute output ID.
  385. h := sha256.New()
  386. if _, err := file.Seek(0, 0); err != nil {
  387. return OutputID{}, 0, err
  388. }
  389. size, err := io.Copy(h, file)
  390. if err != nil {
  391. return OutputID{}, 0, err
  392. }
  393. var out OutputID
  394. h.Sum(out[:0])
  395. // Copy to cached output file (if not already present).
  396. if err := c.copyFile(file, out, size); err != nil {
  397. return out, size, err
  398. }
  399. // Add to cache index.
  400. return out, size, c.putIndexEntry(id, out, size, allowVerify)
  401. }
  402. // PutBytes stores the given bytes in the cache as the output for the action ID.
  403. func (c *Cache) PutBytes(id ActionID, data []byte) error {
  404. _, _, err := c.Put(id, bytes.NewReader(data))
  405. return err
  406. }
  407. // copyFile copies file into the cache, expecting it to have the given
  408. // output ID and size, if that file is not present already.
  409. func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
  410. name := c.fileName(out, "d")
  411. info, err := os.Stat(name)
  412. if err == nil && info.Size() == size {
  413. // Check hash.
  414. if f, err := os.Open(name); err == nil {
  415. h := sha256.New()
  416. io.Copy(h, f)
  417. f.Close()
  418. var out2 OutputID
  419. h.Sum(out2[:0])
  420. if out == out2 {
  421. return nil
  422. }
  423. }
  424. // Hash did not match. Fall through and rewrite file.
  425. }
  426. // Copy file to cache directory.
  427. mode := os.O_RDWR | os.O_CREATE
  428. if err == nil && info.Size() > size { // shouldn't happen but fix in case
  429. mode |= os.O_TRUNC
  430. }
  431. f, err := os.OpenFile(name, mode, 0666)
  432. if err != nil {
  433. return err
  434. }
  435. defer f.Close()
  436. if size == 0 {
  437. // File now exists with correct size.
  438. // Only one possible zero-length file, so contents are OK too.
  439. // Early return here makes sure there's a "last byte" for code below.
  440. return nil
  441. }
  442. // From here on, if any of the I/O writing the file fails,
  443. // we make a best-effort attempt to truncate the file f
  444. // before returning, to avoid leaving bad bytes in the file.
  445. // Copy file to f, but also into h to double-check hash.
  446. if _, err := file.Seek(0, 0); err != nil {
  447. f.Truncate(0)
  448. return err
  449. }
  450. h := sha256.New()
  451. w := io.MultiWriter(f, h)
  452. if _, err := io.CopyN(w, file, size-1); err != nil {
  453. f.Truncate(0)
  454. return err
  455. }
  456. // Check last byte before writing it; writing it will make the size match
  457. // what other processes expect to find and might cause them to start
  458. // using the file.
  459. buf := make([]byte, 1)
  460. if _, err := file.Read(buf); err != nil {
  461. f.Truncate(0)
  462. return err
  463. }
  464. h.Write(buf)
  465. sum := h.Sum(nil)
  466. if !bytes.Equal(sum, out[:]) {
  467. f.Truncate(0)
  468. return fmt.Errorf("file content changed underfoot")
  469. }
  470. // Commit cache file entry.
  471. if _, err := f.Write(buf); err != nil {
  472. f.Truncate(0)
  473. return err
  474. }
  475. if err := f.Close(); err != nil {
  476. // Data might not have been written,
  477. // but file may look like it is the right size.
  478. // To be extra careful, remove cached file.
  479. os.Remove(name)
  480. return err
  481. }
  482. os.Chtimes(name, c.now(), c.now()) // mainly for tests
  483. return nil
  484. }