filebank.go 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. package stringutil
  2. import (
  3. "encoding/binary"
  4. "fmt"
  5. "io"
  6. "os"
  7. "sort"
  8. "sync"
  9. "time"
  10. )
  11. // fileLruEntry tracks the map key (lookup key) separately from the canonical value
  12. // so eviction can persist and rehydrate correctly when key != value.
  13. type fileLruEntry struct {
  14. key string
  15. value string
  16. used int64
  17. }
  18. type fileStringBank struct {
  19. lock sync.Mutex
  20. stop chan struct{}
  21. maxBytes int
  22. currentSize int
  23. f *os.File
  24. m map[string]*fileLruEntry
  25. // spill maps a lookup key to a byte offset in f for strings evicted from m.
  26. spill map[string]int64
  27. }
  28. // NewFileStringBank returns a StringBank that keeps an LRU in memory and persists
  29. // evicted strings in a length-prefixed record file at path. Lookups consult memory,
  30. // then the spill index, then allocate and cache.
  31. func NewFileStringBank(path string, maxBytes int, evictionInterval time.Duration) (StringBank, error) {
  32. ff, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o644)
  33. if err != nil {
  34. return nil, fmt.Errorf("stringutil: open file string bank: %w", err)
  35. }
  36. stop := make(chan struct{})
  37. bank := &fileStringBank{
  38. f: ff,
  39. m: make(map[string]*fileLruEntry),
  40. spill: make(map[string]int64),
  41. maxBytes: maxBytes,
  42. stop: stop,
  43. }
  44. go func() {
  45. for {
  46. select {
  47. case <-stop:
  48. return
  49. case <-time.After(evictionInterval):
  50. }
  51. bank.lock.Lock()
  52. evictFileBank(bank, maxBytes)
  53. bank.lock.Unlock()
  54. }
  55. }()
  56. return bank, nil
  57. }
  58. func evictFileBank(bank *fileStringBank, maxBytes int) {
  59. if bank.currentSize <= maxBytes {
  60. return
  61. }
  62. arr := make([]*fileLruEntry, 0, len(bank.m))
  63. for _, v := range bank.m {
  64. arr = append(arr, v)
  65. }
  66. sort.Slice(arr, func(i, j int) bool {
  67. return arr[i].used < arr[j].used
  68. })
  69. for _, old := range arr {
  70. if bank.currentSize <= maxBytes {
  71. return
  72. }
  73. if err := bank.persistSpill(old); err != nil {
  74. // Best effort: leave entry in memory on I/O failure to avoid losing data.
  75. continue
  76. }
  77. delete(bank.m, old.key)
  78. bank.currentSize -= fileEntrySize(old)
  79. }
  80. }
  81. func fileEntrySize(e *fileLruEntry) int {
  82. return len(e.key) + len(e.value)
  83. }
  84. func (bank *fileStringBank) persistSpill(e *fileLruEntry) error {
  85. off, err := bank.f.Seek(0, io.SeekEnd)
  86. if err != nil {
  87. return err
  88. }
  89. payload := []byte(e.value)
  90. var hdr [4]byte
  91. binary.BigEndian.PutUint32(hdr[:], uint32(len(payload)))
  92. if _, err := bank.f.Write(hdr[:]); err != nil {
  93. return err
  94. }
  95. if _, err := bank.f.Write(payload); err != nil {
  96. return err
  97. }
  98. // if err := bank.f.Sync(); err != nil {
  99. // return err
  100. // }
  101. bank.spill[e.key] = off
  102. return nil
  103. }
  104. func (bank *fileStringBank) readSpill(offset int64) (string, error) {
  105. if _, err := bank.f.Seek(offset, io.SeekStart); err != nil {
  106. return "", err
  107. }
  108. var hdr [4]byte
  109. if _, err := io.ReadFull(bank.f, hdr[:]); err != nil {
  110. return "", err
  111. }
  112. n := binary.BigEndian.Uint32(hdr[:])
  113. buf := make([]byte, n)
  114. if _, err := io.ReadFull(bank.f, buf); err != nil {
  115. return "", err
  116. }
  117. return string(buf), nil
  118. }
  119. func (bank *fileStringBank) loadFromSpill(key string) (string, bool) {
  120. off, ok := bank.spill[key]
  121. if !ok {
  122. return "", false
  123. }
  124. s, err := bank.readSpill(off)
  125. if err != nil {
  126. return "", false
  127. }
  128. return s, true
  129. }
  130. func nOldestFileEntries(arr []*fileLruEntry, n int) []*fileLruEntry {
  131. if n <= 0 {
  132. return []*fileLruEntry{}
  133. }
  134. if n >= len(arr) {
  135. return arr
  136. }
  137. adapted := make([]*lruEntry, len(arr))
  138. for i := range arr {
  139. adapted[i] = &lruEntry{used: arr[i].used}
  140. }
  141. oldestIdx := make(map[*lruEntry]int, len(arr))
  142. for i := range arr {
  143. oldestIdx[adapted[i]] = i
  144. }
  145. oldest := nOldest(adapted, n)
  146. out := make([]*fileLruEntry, 0, len(oldest))
  147. for _, o := range oldest {
  148. out = append(out, arr[oldestIdx[o]])
  149. }
  150. return out
  151. }
  152. // Stop ends the background eviction goroutine. Close closes the backing file.
  153. func (bank *fileStringBank) Stop() {
  154. bank.lock.Lock()
  155. defer bank.lock.Unlock()
  156. if bank.stop != nil {
  157. close(bank.stop)
  158. bank.stop = nil
  159. }
  160. }
  161. // Close releases the file handle; Stop is implied for the eviction loop.
  162. func (bank *fileStringBank) Close() error {
  163. bank.Stop()
  164. bank.lock.Lock()
  165. defer bank.lock.Unlock()
  166. var err error
  167. if bank.f != nil {
  168. err = bank.f.Close()
  169. bank.f = nil
  170. }
  171. return err
  172. }
  173. func (bank *fileStringBank) LoadOrStore(key, value string) (string, bool) {
  174. bank.lock.Lock()
  175. defer bank.lock.Unlock()
  176. if v, ok := bank.m[key]; ok {
  177. v.used = time.Now().UnixMilli()
  178. return v.value, true
  179. }
  180. if s, ok := bank.loadFromSpill(key); ok {
  181. bank.m[key] = &fileLruEntry{
  182. key: key,
  183. value: s,
  184. used: time.Now().UnixMilli(),
  185. }
  186. bank.currentSize += len(key) + len(s)
  187. delete(bank.spill, key)
  188. if bank.currentSize > (bank.maxBytes + (bank.maxBytes / 2)) {
  189. evictFileBank(bank, bank.maxBytes)
  190. }
  191. return s, true
  192. }
  193. bank.m[key] = &fileLruEntry{
  194. key: key,
  195. value: value,
  196. used: time.Now().UnixMilli(),
  197. }
  198. bank.currentSize += len(key) + len(value)
  199. if bank.currentSize > (bank.maxBytes + (bank.maxBytes / 2)) {
  200. evictFileBank(bank, bank.maxBytes)
  201. }
  202. return value, false
  203. }
  204. func (bank *fileStringBank) LoadOrStoreFunc(key string, f func() string) (string, bool) {
  205. bank.lock.Lock()
  206. defer bank.lock.Unlock()
  207. if v, ok := bank.m[key]; ok {
  208. v.used = time.Now().UnixMilli()
  209. return v.value, true
  210. }
  211. if s, ok := bank.loadFromSpill(key); ok {
  212. bank.m[key] = &fileLruEntry{
  213. key: key,
  214. value: s,
  215. used: time.Now().UnixMilli(),
  216. }
  217. bank.currentSize += len(key) + len(s)
  218. delete(bank.spill, key)
  219. if bank.currentSize > (bank.maxBytes + (bank.maxBytes / 2)) {
  220. evictFileBank(bank, bank.maxBytes)
  221. }
  222. return s, true
  223. }
  224. value := f()
  225. bank.m[value] = &fileLruEntry{
  226. key: value,
  227. value: value,
  228. used: time.Now().UnixMilli(),
  229. }
  230. bank.currentSize += len(value) + len(value)
  231. if bank.currentSize > (bank.maxBytes + (bank.maxBytes / 2)) {
  232. evictFileBank(bank, bank.maxBytes)
  233. }
  234. return value, false
  235. }
  236. func (bank *fileStringBank) Clear() {
  237. bank.lock.Lock()
  238. defer bank.lock.Unlock()
  239. bank.m = make(map[string]*fileLruEntry)
  240. bank.spill = make(map[string]int64)
  241. bank.currentSize = 0
  242. if bank.f != nil {
  243. if err := bank.f.Truncate(0); err == nil {
  244. _, _ = bank.f.Seek(0, io.SeekStart)
  245. }
  246. }
  247. }