worker.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. package worker
  2. import (
  3. "fmt"
  4. "iter"
  5. "runtime"
  6. "sync"
  7. "sync/atomic"
  8. "github.com/opencost/opencost/core/pkg/collections"
  9. "github.com/opencost/opencost/core/pkg/util/sliceutil"
  10. )
  11. // Runner is a function type that takes a single input and returns nothing.
  12. type Runner[T any] func(T)
  13. // Worker is a transformation function from input type T to output type U.
  14. type Worker[T any, U any] func(T) U
  15. // WorkerPool is a pool of go routines executing a Worker on supplied inputs via
  16. // the Run function.
  17. type WorkerPool[T any, U any] interface {
  18. // Run executes a Worker in the pool on the provided input and onComplete receive chanel
  19. // to get the results. An error is returned if the pool is shutdown, or is in the process
  20. // of shutting down.
  21. Run(input T, onComplete chan<- U) error
  22. // Shutdown stops all of the workers (if running).
  23. Shutdown()
  24. }
  25. // WorkGroup is a group of inputs that leverage a WorkerPool to run inputs through workers and
  26. // collect the results in a single slice.
  27. type WorkGroup[T any, U any] interface {
  28. // Push adds a new input to the work group.
  29. Push(T) error
  30. // Wait waits for all pending worker tasks to complete, then returns all the results.
  31. Wait() []U
  32. }
  33. // entry is an internal helper type for pushing payloads to the worker queue
  34. type entry[T any, U any] struct {
  35. payload T
  36. onComplete chan<- U
  37. close bool
  38. }
  39. // queuedWorkerPool is a blocking queue based implementation of a WorkerPool
  40. type queuedWorkerPool[T any, U any] struct {
  41. queue collections.BlockingQueue[entry[T, U]]
  42. work Worker[T, U]
  43. workers int
  44. isShutdown atomic.Bool
  45. }
  46. // ordered is a WorkGroup implementation which enforces ordering based on when
  47. // inputs were pushed onto the group.
  48. type ordered[T any, U any] struct {
  49. workPool WorkerPool[T, U]
  50. results []U
  51. wg sync.WaitGroup
  52. count int
  53. }
  54. // NewWorkerPool creates a new worker pool provided the number of workers to run as well as the worker
  55. // func used to transform inputs to outputs.
  56. func NewWorkerPool[T any, U any](workers int, work Worker[T, U]) WorkerPool[T, U] {
  57. owq := &queuedWorkerPool[T, U]{
  58. workers: workers,
  59. work: work,
  60. queue: collections.NewBlockingQueue[entry[T, U]](),
  61. }
  62. // startup the designated workers
  63. for i := 0; i < workers; i++ {
  64. go owq.worker()
  65. }
  66. return owq
  67. }
  68. // Run executes a Worker in the pool on the provided input and onComplete receive chanel
  69. // to get the results. An error is returned if the pool is shutdown, or is in the process
  70. // of shutting down.
  71. func (wq *queuedWorkerPool[T, U]) Run(input T, onComplete chan<- U) error {
  72. if wq.isShutdown.Load() {
  73. return fmt.Errorf("WorkerPoolShutdown")
  74. }
  75. wq.queue.Enqueue(entry[T, U]{
  76. payload: input,
  77. onComplete: onComplete,
  78. close: false,
  79. })
  80. return nil
  81. }
  82. // Shutdown stops all of the workers (if running).
  83. func (wq *queuedWorkerPool[T, U]) Shutdown() {
  84. if !wq.isShutdown.CompareAndSwap(false, true) {
  85. return
  86. }
  87. for i := 0; i < wq.workers; i++ {
  88. wq.queue.Enqueue(entry[T, U]{
  89. close: true,
  90. })
  91. }
  92. }
  93. func (wq *queuedWorkerPool[T, U]) worker() {
  94. for {
  95. next := wq.queue.Dequeue()
  96. // shutdown the worker on sentinel value
  97. if next.close {
  98. return
  99. }
  100. result := wq.work(next.payload)
  101. // signal on complete if applicable
  102. if next.onComplete != nil {
  103. next.onComplete <- result
  104. }
  105. }
  106. }
  107. // NewGroup creates a new WorkGroup implementation for processing a group of inputs in the order in which
  108. // they are pushed. Ordered groups do not support concurrent Push() calls.
  109. func NewOrderedGroup[T any, U any](pool WorkerPool[T, U], size int) WorkGroup[T, U] {
  110. return &ordered[T, U]{
  111. workPool: pool,
  112. results: make([]U, size),
  113. count: 0,
  114. }
  115. }
  116. // Push adds a new input to the work group.
  117. func (ow *ordered[T, U]) Push(input T) error {
  118. current := ow.count
  119. if current >= len(ow.results) {
  120. return fmt.Errorf("MaxCapacity")
  121. }
  122. onComplete := make(chan U)
  123. err := ow.workPool.Run(input, onComplete)
  124. if err != nil {
  125. return err
  126. }
  127. ow.count++
  128. ow.wg.Add(1)
  129. go func(index int) {
  130. defer close(onComplete)
  131. ow.results[index] = <-onComplete
  132. ow.wg.Done()
  133. }(int(current))
  134. return nil
  135. }
  136. // Wait waits for all pending worker tasks to complete, then returns all the results.
  137. func (ow *ordered[T, U]) Wait() []U {
  138. ow.wg.Wait()
  139. return ow.results
  140. }
  141. // noResultGroup is a WorkGroup implementation which arbitrarily pushes inputs to
  142. // a runner pool to be executed concurrently. This group does not collect results.
  143. type noResultGroup[T any] struct {
  144. workPool WorkerPool[T, struct{}]
  145. wg sync.WaitGroup
  146. }
  147. // NewNoResultGroup creates a new WorkGroup implementation for processing a group of inputs concurrently. This
  148. // work group implementation does not collect results, and therefore, requires a worker pool with a struct{} output.
  149. func NewNoResultGroup[T any](pool WorkerPool[T, struct{}]) WorkGroup[T, struct{}] {
  150. return &noResultGroup[T]{
  151. workPool: pool,
  152. }
  153. }
  154. // Push adds a new input to the work group.
  155. func (ow *noResultGroup[T]) Push(input T) error {
  156. onComplete := make(chan struct{})
  157. err := ow.workPool.Run(input, onComplete)
  158. if err != nil {
  159. return err
  160. }
  161. ow.wg.Add(1)
  162. go func() {
  163. defer close(onComplete)
  164. defer ow.wg.Done()
  165. <-onComplete
  166. }()
  167. return nil
  168. }
  169. // Wait waits for all pending worker tasks to complete, then returns all the results.
  170. func (ow *noResultGroup[T]) Wait() []struct{} {
  171. ow.wg.Wait()
  172. return []struct{}{}
  173. }
  174. // collector is a WorkGroup implementation which collects non-nil results into the results slice
  175. // and ignores any nil results.
  176. type collector[T any, U any] struct {
  177. workPool WorkerPool[T, *U]
  178. resultLock sync.Mutex
  179. results []*U
  180. wg sync.WaitGroup
  181. }
  182. // NewCollectionGroup creates a new WorkGroup implementation for processing a group of inputs concurrently. The
  183. // collection group implementation will collect all non-nil results into the output slice. Thus, the worker pool
  184. // parameter requires the output type to be a pointer.
  185. func NewCollectionGroup[T any, U any](pool WorkerPool[T, *U]) WorkGroup[T, *U] {
  186. return &collector[T, U]{
  187. workPool: pool,
  188. }
  189. }
  190. // Push adds a new input to the work group.
  191. func (ow *collector[T, U]) Push(input T) error {
  192. onComplete := make(chan *U)
  193. err := ow.workPool.Run(input, onComplete)
  194. if err != nil {
  195. return err
  196. }
  197. ow.wg.Add(1)
  198. go func() {
  199. defer ow.wg.Done()
  200. defer close(onComplete)
  201. result := <-onComplete
  202. if result != nil {
  203. ow.resultLock.Lock()
  204. ow.results = append(ow.results, result)
  205. ow.resultLock.Unlock()
  206. }
  207. }()
  208. return nil
  209. }
  210. // Wait waits for all pending worker tasks to complete, then returns all the results.
  211. func (ow *collector[T, U]) Wait() []*U {
  212. ow.wg.Wait()
  213. return ow.results
  214. }
  215. // these constraints protect against the possibility of unexpected output from runtime.NumCPU()
  216. const (
  217. defaultMinWorkers = 4
  218. defaultMaxWorkers = 16
  219. )
  220. // OptimalWorkerCount will return an optimal worker count based on runtime.NumCPU()
  221. func OptimalWorkerCount() int {
  222. return OptimalWorkerCountInRange(defaultMinWorkers, defaultMaxWorkers)
  223. }
  224. // OptimalWorkerCount will return runtime.NumCPU() constrained to the provided min and max
  225. // range
  226. func OptimalWorkerCountInRange(min int, max int) int {
  227. cores := runtime.NumCPU()
  228. if cores < min {
  229. return min
  230. }
  231. if cores > max {
  232. return max
  233. }
  234. return cores
  235. }
  236. // ConcurrentDo runs a pool of N workers which concurrently call the provided worker func on each
  237. // input to get ordered output corresponding to the inputs. The total number of workers is determined
  238. // by the total number of CPUs available, bound to a range from 4-16.
  239. func ConcurrentDo[T any, U any](worker Worker[T, U], inputs []T) []U {
  240. return ConcurrentDoWith(OptimalWorkerCount(), worker, inputs)
  241. }
  242. // ConcurrentDoWith runs a pool of workers of the specified size which concurrently call the provided worker func
  243. // on each input to get ordered output corresponding to the inputs. Size inputs < 1 will automatically be set to 1.
  244. func ConcurrentDoWith[T any, U any](size int, worker Worker[T, U], inputs []T) []U {
  245. if size < 1 {
  246. size = 1
  247. }
  248. workerPool := NewWorkerPool(size, worker)
  249. defer workerPool.Shutdown()
  250. workGroup := NewOrderedGroup(workerPool, len(inputs))
  251. for _, input := range inputs {
  252. workGroup.Push(input)
  253. }
  254. return workGroup.Wait()
  255. }
  256. // ConcurrentCollect runs a pool of N workers which concurrently call the provided worker func on each
  257. // input to get a result slice of non-nil outputs. The total number of workers is determined
  258. // by the total number of CPUs available, bound to a range from 4-16.
  259. func ConcurrentCollect[T any, U any](workerFunc Worker[T, *U], inputs []T) []*U {
  260. return ConcurrentCollectWith(OptimalWorkerCount(), workerFunc, inputs)
  261. }
  262. // ConcurrentCollectWith runs a pool of workers of the specified size which concurrently call the provided worker
  263. // func on each input to get a result slice of non-nil outputs. Size inputs < 1 will automatically be set to 1.
  264. func ConcurrentCollectWith[T any, U any](size int, workerFunc Worker[T, *U], inputs []T) []*U {
  265. return ConcurrentIterCollect(size, workerFunc, sliceutil.AsSeq(inputs))
  266. }
  267. // ConcurrentIterCollect runs a pool of workers of the specified size which concurrently call the provided worker
  268. // func on each input to get a result slice of non-nil outputs. Size inputs < 1 will automatically be set to 1.
  269. func ConcurrentIterCollect[T any, U any](size int, workerFunc Worker[T, *U], inputs iter.Seq[T]) []*U {
  270. if size < 1 {
  271. size = 1
  272. }
  273. workerPool := NewWorkerPool(size, workerFunc)
  274. defer workerPool.Shutdown()
  275. workGroup := NewCollectionGroup(workerPool)
  276. for input := range inputs {
  277. workGroup.Push(input)
  278. }
  279. return workGroup.Wait()
  280. }
  281. // ConcurrentRun runs a pool of N workers which concurrently call the provided runner func on each
  282. // input. The total number of workers is determined by the total number of CPUs available, bound to
  283. // a range from 4-16.
  284. func ConcurrentRun[T any](runner Runner[T], inputs []T) {
  285. ConcurrentRunWith(OptimalWorkerCount(), runner, inputs)
  286. }
  287. // ConcurrentRunWith runs a pool of runners of the specified size which concurrently call the provided runner
  288. // func on each input. Size inputs < 1 will automatically be set to 1.
  289. func ConcurrentRunWith[T any](size int, runner Runner[T], inputs []T) {
  290. ConcurrentIterRunWith(size, runner, sliceutil.AsSeq(inputs))
  291. }
  292. // ConcurrentIterRunWith runs a pool of runners of the specified size which concurrently call the provided runner
  293. // func on each input. Size inputs < 1 will automatically be set to 1.
  294. func ConcurrentIterRunWith[T any](size int, runner Runner[T], inputs iter.Seq[T]) {
  295. if size < 1 {
  296. size = 1
  297. }
  298. workerPool := NewWorkerPool(size, func(input T) (void struct{}) {
  299. runner(input)
  300. return
  301. })
  302. defer workerPool.Shutdown()
  303. workGroup := NewNoResultGroup(workerPool)
  304. for input := range inputs {
  305. workGroup.Push(input)
  306. }
  307. workGroup.Wait()
  308. }