| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272 |
- package stringutil
- import (
- "encoding/binary"
- "fmt"
- "io"
- "os"
- "sort"
- "sync"
- "time"
- )
- // fileLruEntry tracks the map key (lookup key) separately from the canonical value
- // so eviction can persist and rehydrate correctly when key != value.
- type fileLruEntry struct {
- key string
- value string
- used int64
- }
- type fileStringBank struct {
- lock sync.Mutex
- stop chan struct{}
- maxBytes int
- currentSize int
- f *os.File
- m map[string]*fileLruEntry
- // spill maps a lookup key to a byte offset in f for strings evicted from m.
- spill map[string]int64
- }
- // NewFileStringBank returns a StringBank that keeps an LRU in memory and persists
- // evicted strings in a length-prefixed record file at path. Lookups consult memory,
- // then the spill index, then allocate and cache.
- func NewFileStringBank(path string, maxBytes int, evictionInterval time.Duration) (StringBank, error) {
- ff, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o644)
- if err != nil {
- return nil, fmt.Errorf("stringutil: open file string bank: %w", err)
- }
- stop := make(chan struct{})
- bank := &fileStringBank{
- f: ff,
- m: make(map[string]*fileLruEntry),
- spill: make(map[string]int64),
- maxBytes: maxBytes,
- stop: stop,
- }
- go func() {
- for {
- select {
- case <-stop:
- return
- case <-time.After(evictionInterval):
- }
- bank.lock.Lock()
- evictFileBank(bank, maxBytes)
- bank.lock.Unlock()
- }
- }()
- return bank, nil
- }
- func evictFileBank(bank *fileStringBank, maxBytes int) {
- if bank.currentSize <= maxBytes {
- return
- }
- arr := make([]*fileLruEntry, 0, len(bank.m))
- for _, v := range bank.m {
- arr = append(arr, v)
- }
- sort.Slice(arr, func(i, j int) bool {
- return arr[i].used < arr[j].used
- })
- for _, old := range arr {
- if bank.currentSize <= maxBytes {
- return
- }
- if err := bank.persistSpill(old); err != nil {
- // Best effort: leave entry in memory on I/O failure to avoid losing data.
- continue
- }
- delete(bank.m, old.key)
- bank.currentSize -= fileEntrySize(old)
- }
- }
- func fileEntrySize(e *fileLruEntry) int {
- return len(e.key) + len(e.value)
- }
- func (bank *fileStringBank) persistSpill(e *fileLruEntry) error {
- off, err := bank.f.Seek(0, io.SeekEnd)
- if err != nil {
- return err
- }
- payload := []byte(e.value)
- var hdr [4]byte
- binary.BigEndian.PutUint32(hdr[:], uint32(len(payload)))
- if _, err := bank.f.Write(hdr[:]); err != nil {
- return err
- }
- if _, err := bank.f.Write(payload); err != nil {
- return err
- }
- // if err := bank.f.Sync(); err != nil {
- // return err
- // }
- bank.spill[e.key] = off
- return nil
- }
- func (bank *fileStringBank) readSpill(offset int64) (string, error) {
- if _, err := bank.f.Seek(offset, io.SeekStart); err != nil {
- return "", err
- }
- var hdr [4]byte
- if _, err := io.ReadFull(bank.f, hdr[:]); err != nil {
- return "", err
- }
- n := binary.BigEndian.Uint32(hdr[:])
- buf := make([]byte, n)
- if _, err := io.ReadFull(bank.f, buf); err != nil {
- return "", err
- }
- return string(buf), nil
- }
- func (bank *fileStringBank) loadFromSpill(key string) (string, bool) {
- off, ok := bank.spill[key]
- if !ok {
- return "", false
- }
- s, err := bank.readSpill(off)
- if err != nil {
- return "", false
- }
- return s, true
- }
- func nOldestFileEntries(arr []*fileLruEntry, n int) []*fileLruEntry {
- if n <= 0 {
- return []*fileLruEntry{}
- }
- if n >= len(arr) {
- return arr
- }
- adapted := make([]*lruEntry, len(arr))
- for i := range arr {
- adapted[i] = &lruEntry{used: arr[i].used}
- }
- oldestIdx := make(map[*lruEntry]int, len(arr))
- for i := range arr {
- oldestIdx[adapted[i]] = i
- }
- oldest := nOldest(adapted, n)
- out := make([]*fileLruEntry, 0, len(oldest))
- for _, o := range oldest {
- out = append(out, arr[oldestIdx[o]])
- }
- return out
- }
- // Stop ends the background eviction goroutine. Close closes the backing file.
- func (bank *fileStringBank) Stop() {
- bank.lock.Lock()
- defer bank.lock.Unlock()
- if bank.stop != nil {
- close(bank.stop)
- bank.stop = nil
- }
- }
- // Close releases the file handle; Stop is implied for the eviction loop.
- func (bank *fileStringBank) Close() error {
- bank.Stop()
- bank.lock.Lock()
- defer bank.lock.Unlock()
- var err error
- if bank.f != nil {
- err = bank.f.Close()
- bank.f = nil
- }
- return err
- }
- func (bank *fileStringBank) LoadOrStore(key, value string) (string, bool) {
- bank.lock.Lock()
- defer bank.lock.Unlock()
- if v, ok := bank.m[key]; ok {
- v.used = time.Now().UnixMilli()
- return v.value, true
- }
- if s, ok := bank.loadFromSpill(key); ok {
- bank.m[key] = &fileLruEntry{
- key: key,
- value: s,
- used: time.Now().UnixMilli(),
- }
- bank.currentSize += len(key) + len(s)
- delete(bank.spill, key)
- if bank.currentSize > (bank.maxBytes + (bank.maxBytes / 2)) {
- evictFileBank(bank, bank.maxBytes)
- }
- return s, true
- }
- bank.m[key] = &fileLruEntry{
- key: key,
- value: value,
- used: time.Now().UnixMilli(),
- }
- bank.currentSize += len(key) + len(value)
- if bank.currentSize > (bank.maxBytes + (bank.maxBytes / 2)) {
- evictFileBank(bank, bank.maxBytes)
- }
- return value, false
- }
- func (bank *fileStringBank) LoadOrStoreFunc(key string, f func() string) (string, bool) {
- bank.lock.Lock()
- defer bank.lock.Unlock()
- if v, ok := bank.m[key]; ok {
- v.used = time.Now().UnixMilli()
- return v.value, true
- }
- if s, ok := bank.loadFromSpill(key); ok {
- bank.m[key] = &fileLruEntry{
- key: key,
- value: s,
- used: time.Now().UnixMilli(),
- }
- bank.currentSize += len(key) + len(s)
- delete(bank.spill, key)
- if bank.currentSize > (bank.maxBytes + (bank.maxBytes / 2)) {
- evictFileBank(bank, bank.maxBytes)
- }
- return s, true
- }
- value := f()
- bank.m[value] = &fileLruEntry{
- key: value,
- value: value,
- used: time.Now().UnixMilli(),
- }
- bank.currentSize += len(value) + len(value)
- if bank.currentSize > (bank.maxBytes + (bank.maxBytes / 2)) {
- evictFileBank(bank, bank.maxBytes)
- }
- return value, false
- }
- func (bank *fileStringBank) Clear() {
- bank.lock.Lock()
- defer bank.lock.Unlock()
- bank.m = make(map[string]*fileLruEntry)
- bank.spill = make(map[string]int64)
- bank.currentSize = 0
- if bank.f != nil {
- if err := bank.f.Truncate(0); err == nil {
- _, _ = bank.f.Seek(0, io.SeekStart)
- }
- }
- }
|