Alex Meijer 2 هفته پیش
والد
کامیت
b28c30174e
2فایلهای تغییر یافته به همراه213 افزوده شده و 45 حذف شده
  1. 166 10
      core/pkg/opencost/opencost_codecs.go
  2. 47 35
      core/pkg/opencost/opencost_codecs_test.go

+ 166 - 10
core/pkg/opencost/opencost_codecs.go

@@ -17,13 +17,14 @@ import (
 	"iter"
 	"os"
 	"reflect"
+	"sort"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"
 	"unsafe"
 
 	util "github.com/opencost/opencost/core/pkg/util"
-	"github.com/opencost/opencost/core/pkg/util/stringutil"
 )
 
 const (
@@ -73,6 +74,10 @@ type BingenConfiguration struct {
 
 	// FileBackedStringTableDir is the directory to write the string table files for reading.
 	FileBackedStringTableDir string
+
+	// FileBackedStringTableMemoMaxBytes limits in-memory memoization for file-backed table lookups.
+	// 0 disables memoization.
+	FileBackedStringTableMemoMaxBytes int64
 }
 
 // DefaultBingenConfiguration creates the default implementation of the bingen configuration
@@ -81,6 +86,7 @@ func DefaultBingenConfiguration() *BingenConfiguration {
 	return &BingenConfiguration{
 		FileBackedStringTableEnabled: false,
 		FileBackedStringTableDir:     os.TempDir(),
+		FileBackedStringTableMemoMaxBytes: 0,
 	}
 }
 
@@ -112,6 +118,14 @@ func BingenFileBackedStringTableDir() string {
 	return bingenConfig.FileBackedStringTableDir
 }
 
+// BingenFileBackedStringTableMemoMaxBytes returns the maximum bytes used for file-backed memo cache.
+func BingenFileBackedStringTableMemoMaxBytes() int64 {
+	bingenConfigLock.RLock()
+	defer bingenConfigLock.RUnlock()
+
+	return bingenConfig.FileBackedStringTableMemoMaxBytes
+}
+
 //--------------------------------------------------------------------------
 //  Type Map
 //--------------------------------------------------------------------------
@@ -456,6 +470,12 @@ type fileStringRef struct {
 type FileStringTableReader struct {
 	f    *os.File
 	refs []fileStringRef
+	memo           []atomic.Pointer[string]
+	memoHits       []atomic.Uint64
+	memoBytes      atomic.Int64
+	memoMaxBytes   int64
+	evictStop      chan struct{}
+	evictDone      chan struct{}
 }
 
 // NewFileStringTableFromBuffer reads exactly tl length-prefixed (uint16) string payloads from buffer
@@ -514,10 +534,17 @@ func NewFileStringTableReaderFrom(buffer *util.Buffer, dir string) StringTableRe
 		}
 	}
 
-	return &FileStringTableReader{
-		f:    f,
-		refs: refs,
+	reader := &FileStringTableReader{
+		f:            f,
+		refs:         refs,
+		memo:         make([]atomic.Pointer[string], len(refs)),
+		memoHits:     make([]atomic.Uint64, len(refs)),
+		memoMaxBytes: BingenFileBackedStringTableMemoMaxBytes(),
+	}
+	if reader.memoMaxBytes > 0 {
+		reader.startMemoEvictionLoop()
 	}
+	return reader
 }
 
 // At returns the string from the internal file using the reference's offset and length.
@@ -534,18 +561,61 @@ func (fstr *FileStringTableReader) At(index int) string {
 		return ""
 	}
 
+	if fstr.memoMaxBytes > 0 {
+		if cached := fstr.memo[index].Load(); cached != nil {
+			if index < len(fstr.memoHits) {
+				fstr.memoHits[index].Add(1)
+			}
+			return *cached
+		}
+	}
+
 	b := make([]byte, ref.length)
 	_, err := fstr.f.ReadAt(b, ref.off)
 	if err != nil {
 		return ""
 	}
 
-	// cast the allocated bytes to a string in-place, as we
-	// were the ones that allocated the bytes
-	pinned := unsafe.String(unsafe.SliceData(b), len(b))
-	return stringutil.BankFunc(pinned, func() string {
-		return string(b)
-	})
+	// cast the allocated bytes to a string in-place, as we were the ones that allocated the bytes
+	s := unsafe.String(unsafe.SliceData(b), len(b))
+
+	if fstr.memoMaxBytes > 0 {
+		if existing := fstr.memo[index].Load(); existing != nil {
+			if index < len(fstr.memoHits) {
+				fstr.memoHits[index].Add(1)
+			}
+			return *existing
+		}
+		need := int64(len(s))
+		for {
+			current := fstr.memoBytes.Load()
+			if current+need > fstr.memoMaxBytes {
+				break
+			}
+			if fstr.memoBytes.CompareAndSwap(current, current+need) {
+				toStore := new(string)
+				*toStore = s
+				if fstr.memo[index].CompareAndSwap(nil, toStore) {
+					if index < len(fstr.memoHits) {
+						fstr.memoHits[index].Store(1)
+					}
+					return s
+				}
+
+				// Another goroutine won the race for this slot; refund bytes.
+				fstr.memoBytes.Add(-need)
+				if stored := fstr.memo[index].Load(); stored != nil {
+					if index < len(fstr.memoHits) {
+						fstr.memoHits[index].Add(1)
+					}
+					return *stored
+				}
+				break
+			}
+		}
+	}
+
+	return s
 }
 
 // Len returns the total number of strings loaded in the string table.
@@ -563,10 +633,26 @@ func (fstr *FileStringTableReader) Close() error {
 		return nil
 	}
 
+	if fstr.evictStop != nil {
+		close(fstr.evictStop)
+		<-fstr.evictDone
+		fstr.evictStop = nil
+		fstr.evictDone = nil
+	}
+
 	path := fstr.f.Name()
 	err := fstr.f.Close()
 	fstr.f = nil
 	fstr.refs = nil
+	for i := range fstr.memo {
+		fstr.memo[i].Store(nil)
+		if i < len(fstr.memoHits) {
+			fstr.memoHits[i].Store(0)
+		}
+	}
+	fstr.memo = nil
+	fstr.memoHits = nil
+	fstr.memoBytes.Store(0)
 
 	if path != "" {
 		_ = os.Remove(path)
@@ -575,6 +661,76 @@ func (fstr *FileStringTableReader) Close() error {
 	return err
 }
 
+type memoEvictionCandidate struct {
+	idx  int
+	hits uint64
+}
+
+func (fstr *FileStringTableReader) startMemoEvictionLoop() {
+	fstr.evictStop = make(chan struct{})
+	fstr.evictDone = make(chan struct{})
+	go func() {
+		defer close(fstr.evictDone)
+		ticker := time.NewTicker(10 * time.Second)
+		defer ticker.Stop()
+		for {
+			select {
+			case <-fstr.evictStop:
+				return
+			case <-ticker.C:
+				fstr.evictLeastUsedMemoEntries(0.20)
+			}
+		}
+	}()
+}
+
+func (fstr *FileStringTableReader) evictLeastUsedMemoEntries(percent float64) {
+	if fstr == nil || len(fstr.memo) == 0 || percent <= 0 {
+		return
+	}
+
+	candidates := make([]memoEvictionCandidate, 0, len(fstr.memo))
+	for i := range fstr.memo {
+		if fstr.memo[i].Load() == nil {
+			continue
+		}
+		candidates = append(candidates, memoEvictionCandidate{
+			idx:  i,
+			hits: fstr.memoHits[i].Load(),
+		})
+	}
+	if len(candidates) == 0 {
+		return
+	}
+
+	evictCount := int(float64(len(candidates)) * percent)
+	if evictCount <= 0 {
+		evictCount = 1
+	}
+	if evictCount > len(candidates) {
+		evictCount = len(candidates)
+	}
+
+	sort.Slice(candidates, func(i, j int) bool {
+		if candidates[i].hits == candidates[j].hits {
+			return candidates[i].idx < candidates[j].idx
+		}
+		return candidates[i].hits < candidates[j].hits
+	})
+
+	for i := 0; i < evictCount; i++ {
+		idx := candidates[i].idx
+		existing := fstr.memo[idx].Load()
+		if existing == nil {
+			continue
+		}
+		if fstr.memo[idx].CompareAndSwap(existing, nil) {
+			fstr.memoHits[idx].Store(0)
+			fstr.memoBytes.Add(-int64(fstr.refs[idx].length))
+		}
+	}
+}
+
 //--------------------------------------------------------------------------
 //  Codec Context
 //--------------------------------------------------------------------------

+ 47 - 35
core/pkg/opencost/opencost_codecs_test.go

@@ -4,10 +4,9 @@ import (
 	"bytes"
 	"io"
 	"os"
+	"sync/atomic"
 	"testing"
 	"time"
-
-	"github.com/opencost/opencost/core/pkg/util/stringutil"
 )
 
 type UnmarshalFunc func(BingenUnmarshalable, []byte) error
@@ -832,35 +831,7 @@ func TestOpencostBingenFileStringTableEnabledWithReader(t *testing.T) {
 	RunAllOpencostBingenCodecTests(t, UnmarshalBingenReader)
 }
 
-type passthroughStringBank struct{}
-
-func (b *passthroughStringBank) LoadOrStore(key, value string) (string, bool) {
-	return value, false
-}
-
-func (b *passthroughStringBank) LoadOrStoreFunc(key string, f func() string) (string, bool) {
-	return f(), false
-}
-
-func (b *passthroughStringBank) Clear() {}
-
-type markerStringBank struct{}
-
-func (b *markerStringBank) LoadOrStore(key, value string) (string, bool) {
-	return "MARKER:" + value, false
-}
-
-func (b *markerStringBank) LoadOrStoreFunc(key string, f func() string) (string, bool) {
-	return "MARKER:" + f(), false
-}
-
-func (b *markerStringBank) Clear() {}
-
-func TestFileStringTableReaderAt_UsesStringBank(t *testing.T) {
-	orig := stringutil.GetStringBank()
-	stringutil.UpdateStringBank(&passthroughStringBank{})
-	defer stringutil.UpdateStringBank(orig)
-
+func TestFileStringTableReaderAt_UsesMemoCache(t *testing.T) {
 	tmp, err := os.CreateTemp("", "opencost-bgst-test-*")
 	if err != nil {
 		t.Fatalf("create temp file: %v", err)
@@ -873,10 +844,12 @@ func TestFileStringTableReaderAt_UsesStringBank(t *testing.T) {
 	}
 
 	reader := &FileStringTableReader{
-		f: tmp,
+		f:            tmp,
 		refs: []fileStringRef{
 			{off: 0, length: 5},
 		},
+		memo:         make([]atomic.Pointer[string], 1),
+		memoMaxBytes: 16,
 	}
 	defer reader.Close()
 
@@ -884,8 +857,47 @@ func TestFileStringTableReaderAt_UsesStringBank(t *testing.T) {
 		t.Fatalf("baseline string mismatch, got %q", got)
 	}
 
-	stringutil.UpdateStringBank(&markerStringBank{})
-	if got := reader.At(0); got != "MARKER:hello" {
-		t.Fatalf("expected banked marker value, got %q", got)
+	if err := tmp.Truncate(0); err != nil {
+		t.Fatalf("truncate temp file: %v", err)
+	}
+
+	if got := reader.At(0); got != "hello" {
+		t.Fatalf("expected memoized value after truncate, got %q", got)
+	}
+}
+
+func TestFileStringTableReader_EvictLeastUsedMemoEntries(t *testing.T) {
+	s1 := "aaaa"
+	s2 := "bbbb"
+	s3 := "cccc"
+	s4 := "dddd"
+	reader := &FileStringTableReader{
+		refs: []fileStringRef{
+			{length: len(s1)},
+			{length: len(s2)},
+			{length: len(s3)},
+			{length: len(s4)},
+		},
+		memo:     make([]atomic.Pointer[string], 4),
+		memoHits: make([]atomic.Uint64, 4),
+	}
+
+	reader.memo[0].Store(&s1)
+	reader.memo[1].Store(&s2)
+	reader.memo[2].Store(&s3)
+	reader.memo[3].Store(&s4)
+	reader.memoHits[0].Store(10)
+	reader.memoHits[1].Store(1)
+	reader.memoHits[2].Store(3)
+	reader.memoHits[3].Store(2)
+	reader.memoBytes.Store(int64(len(s1) + len(s2) + len(s3) + len(s4)))
+
+	reader.evictLeastUsedMemoEntries(0.20)
+
+	if got := reader.memo[1].Load(); got != nil {
+		t.Fatalf("expected index 1 to be evicted first, got %q", *got)
+	}
+	if got := reader.memo[3].Load(); got == nil {
+		t.Fatalf("expected index 3 to remain cached")
 	}
 }