stringutil_test.go 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. package stringutil_test
  2. import (
  3. "fmt"
  4. "math/rand"
  5. "strings"
  6. "sync"
  7. "testing"
  8. "time"
  9. "unsafe"
  10. "github.com/opencost/opencost/core/pkg/util/stringutil"
  11. )
  12. var oldBank sync.Map
  13. type bankTest struct {
  14. Bank func(string) string
  15. BankFunc func(string, func() string) string
  16. Clear func()
  17. }
  18. var (
  19. legacyTest = bankTest{
  20. Bank: BankLegacy,
  21. BankFunc: func(s string, f func() string) string { return s },
  22. Clear: ClearBankLegacy,
  23. }
  24. standardBankTest = bankTest{
  25. Bank: stringutil.Bank,
  26. BankFunc: stringutil.BankFunc,
  27. Clear: stringutil.ClearBank,
  28. }
  29. )
  30. // This is the old implementation of the string bank to use for comparison benchmarks
  31. func BankLegacy(s string) string {
  32. ss, _ := oldBank.LoadOrStore(s, s)
  33. return ss.(string)
  34. }
  35. func ClearBankLegacy() {
  36. oldBank = sync.Map{}
  37. }
  38. func copyString(s string) string {
  39. return string([]byte(s))
  40. }
  41. func generateBenchData(totalStrings, totalUnique int) [][]byte {
  42. randStrings := make([]string, 0, totalStrings)
  43. r := rand.New(rand.NewSource(27644437))
  44. // create totalUnique unique strings
  45. for range totalUnique {
  46. randStrings = append(
  47. randStrings,
  48. fmt.Sprintf("%s/%s/%s", stringutil.RandSeqWith(r, 10), stringutil.RandSeqWith(r, 10), stringutil.RandSeqWith(r, 10)),
  49. )
  50. }
  51. // set the seed such that the resulting "remainder" strings are deterministic for each bench
  52. r = rand.New(rand.NewSource(1523942))
  53. // append a random selection from 0-totalUnique to the list.
  54. for range totalStrings - totalUnique {
  55. randStrings = append(randStrings, strings.Clone(randStrings[r.Intn(totalUnique)]))
  56. }
  57. // shuffle the list of strings
  58. r.Shuffle(totalStrings, func(i, j int) { randStrings[i], randStrings[j] = randStrings[j], randStrings[i] })
  59. stringBytes := make([][]byte, 0, totalStrings)
  60. for _, str := range randStrings {
  61. stringBytes = append(stringBytes, []byte(str))
  62. }
  63. return stringBytes
  64. }
  65. func benchmarkStringBank(b *testing.B, bt bankTest, totalStrings, totalUnique int, useBankFunc bool) {
  66. b.StopTimer()
  67. randStrings := generateBenchData(totalStrings, totalUnique)
  68. b.Run(b.Name(), func(b *testing.B) {
  69. for i := 0; i < b.N; i++ {
  70. b.StartTimer()
  71. for bb := 0; bb < totalStrings; bb++ {
  72. bytes := randStrings[bb]
  73. if useBankFunc {
  74. str := unsafe.String(unsafe.SliceData(bytes), len(bytes))
  75. bt.BankFunc(str, func() string {
  76. return string(bytes)
  77. })
  78. } else {
  79. bt.Bank(string(bytes))
  80. }
  81. }
  82. b.StopTimer()
  83. bt.Clear()
  84. //runtime.GC()
  85. //debug.FreeOSMemory()
  86. }
  87. })
  88. }
  89. func BenchmarkLegacyStringBank90PercentDuplicate(b *testing.B) {
  90. benchmarkStringBank(b, legacyTest, 1_000_000, 100_000, false)
  91. }
  92. func BenchmarkLegacyStringBank75PercentDuplicate(b *testing.B) {
  93. benchmarkStringBank(b, legacyTest, 1_000_000, 250_000, false)
  94. }
  95. func BenchmarkLegacyStringBank50PercentDuplicate(b *testing.B) {
  96. benchmarkStringBank(b, legacyTest, 1_000_000, 100_000, false)
  97. }
  98. func BenchmarkLegacyStringBank25PercentDuplicate(b *testing.B) {
  99. benchmarkStringBank(b, legacyTest, 1_000_000, 750_000, false)
  100. }
  101. func BenchmarkLegacyStringBankNoDuplicate(b *testing.B) {
  102. benchmarkStringBank(b, legacyTest, 1_000_000, 1_000_000, false)
  103. }
  104. func BenchmarkStringBank90PercentDuplicate(b *testing.B) {
  105. benchmarkStringBank(b, standardBankTest, 1_000_000, 100_000, false)
  106. }
  107. func BenchmarkStringBank75PercentDuplicate(b *testing.B) {
  108. benchmarkStringBank(b, standardBankTest, 1_000_000, 250_000, false)
  109. }
  110. func BenchmarkStringBank50PercentDuplicate(b *testing.B) {
  111. benchmarkStringBank(b, standardBankTest, 1_000_000, 100_000, false)
  112. }
  113. func BenchmarkStringBank25PercentDuplicate(b *testing.B) {
  114. benchmarkStringBank(b, standardBankTest, 1_000_000, 750_000, false)
  115. }
  116. func BenchmarkStringBankNoDuplicate(b *testing.B) {
  117. benchmarkStringBank(b, standardBankTest, 1_000_000, 1_000_000, false)
  118. }
  119. func BenchmarkStringBankFunc90PercentDuplicate(b *testing.B) {
  120. benchmarkStringBank(b, standardBankTest, 1_000_000, 100_000, true)
  121. }
  122. func BenchmarkStringBankFunc75PercentDuplicate(b *testing.B) {
  123. benchmarkStringBank(b, standardBankTest, 1_000_000, 250_000, true)
  124. }
  125. func BenchmarkStringBankFunc50PercentDuplicate(b *testing.B) {
  126. benchmarkStringBank(b, standardBankTest, 1_000_000, 100_000, true)
  127. }
  128. func BenchmarkStringBankFunc25PercentDuplicate(b *testing.B) {
  129. benchmarkStringBank(b, standardBankTest, 1_000_000, 750_000, true)
  130. }
  131. func BenchmarkStringBankFuncNoDuplicate(b *testing.B) {
  132. benchmarkStringBank(b, standardBankTest, 1_000_000, 1_000_000, true)
  133. }
  134. const LruCapacity = 500_000
  135. const LruEvictInterval = 5 * time.Second
  136. func BenchmarkLruStringBankFunc90PercentDuplicate(b *testing.B) {
  137. sb := stringutil.NewLruStringBank(LruCapacity, LruEvictInterval)
  138. defer func() {
  139. if lruBank, ok := sb.(interface{ Stop() }); ok {
  140. lruBank.Stop()
  141. }
  142. }()
  143. stringutil.UpdateStringBank(sb)
  144. benchmarkStringBank(b, standardBankTest, 1_000_000, 100_000, true)
  145. }
  146. func BenchmarkLruStringBankFunc75PercentDuplicate(b *testing.B) {
  147. sb := stringutil.NewLruStringBank(LruCapacity, LruEvictInterval)
  148. defer func() {
  149. if lruBank, ok := sb.(interface{ Stop() }); ok {
  150. lruBank.Stop()
  151. }
  152. }()
  153. stringutil.UpdateStringBank(sb)
  154. benchmarkStringBank(b, standardBankTest, 1_000_000, 250_000, true)
  155. }
  156. func BenchmarkLruStringBankFunc50PercentDuplicate(b *testing.B) {
  157. sb := stringutil.NewLruStringBank(LruCapacity, LruEvictInterval)
  158. defer func() {
  159. if lruBank, ok := sb.(interface{ Stop() }); ok {
  160. lruBank.Stop()
  161. }
  162. }()
  163. stringutil.UpdateStringBank(sb)
  164. benchmarkStringBank(b, standardBankTest, 1_000_000, 100_000, true)
  165. }
  166. func BenchmarkLruStringBankFunc25PercentDuplicate(b *testing.B) {
  167. sb := stringutil.NewLruStringBank(LruCapacity, LruEvictInterval)
  168. defer func() {
  169. if lruBank, ok := sb.(interface{ Stop() }); ok {
  170. lruBank.Stop()
  171. }
  172. }()
  173. stringutil.UpdateStringBank(sb)
  174. benchmarkStringBank(b, standardBankTest, 1_000_000, 750_000, true)
  175. }
  176. func BenchmarkLruStringBankFuncNoDuplicate(b *testing.B) {
  177. sb := stringutil.NewLruStringBank(LruCapacity, LruEvictInterval)
  178. defer func() {
  179. if lruBank, ok := sb.(interface{ Stop() }); ok {
  180. lruBank.Stop()
  181. }
  182. }()
  183. stringutil.UpdateStringBank(sb)
  184. benchmarkStringBank(b, standardBankTest, 1_000_000, 1_000_000, true)
  185. }