allocation_csv.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. package costmodel
  2. import (
  3. "bytes"
  4. "context"
  5. "encoding/csv"
  6. "io"
  7. "sort"
  8. "strconv"
  9. "time"
  10. "github.com/pkg/errors"
  11. "github.com/opencost/opencost/pkg/kubecost"
  12. "github.com/opencost/opencost/pkg/log"
  13. )
  14. type CloudStorage interface {
  15. Write(name string, data []byte) error
  16. Read(name string) ([]byte, error)
  17. Exists(name string) (bool, error)
  18. }
  19. type AllocationModel interface {
  20. ComputeAllocation(start, end time.Time, resolution time.Duration) (*kubecost.AllocationSet, error)
  21. DateRange() (time.Time, time.Time, error)
  22. }
  23. var errNoData = errors.New("no data")
  24. // UpdateCSVWorker launches a worker that updates CSV file in cloud storage with allocation data
  25. // It updates data immediately on launch and then runs every day at 00:10 UTC
  26. // It expected to run a goroutine
  27. func UpdateCSVWorker(ctx context.Context, storage CloudStorage, model AllocationModel, path string) error {
  28. // perform first update immediately
  29. nextRunAt := time.Now()
  30. for {
  31. select {
  32. case <-ctx.Done():
  33. return ctx.Err()
  34. case <-time.After(nextRunAt.Sub(time.Now())):
  35. err := UpdateCSV(ctx, storage, model, path)
  36. if err != nil {
  37. // it's background worker, log error and carry on, maybe next time it will work
  38. log.Errorf("Error updating CSV: %s", err)
  39. }
  40. now := time.Now()
  41. // next launch is at 00:10 UTC tomorrow
  42. // extra 10 minutes is to let prometheus to collect all the data for the previous day
  43. nextRunAt = time.Date(now.Year(), now.Month(), now.Day(), 0, 10, 0, 0, time.UTC).AddDate(0, 0, 1)
  44. }
  45. }
  46. }
  47. func UpdateCSV(ctx context.Context, storage CloudStorage, model AllocationModel, path string) error {
  48. exporter := &csvExporter{
  49. Storage: storage,
  50. Model: model,
  51. FilePath: path,
  52. }
  53. return exporter.Update(ctx)
  54. }
  55. type csvExporter struct {
  56. Storage CloudStorage
  57. Model AllocationModel
  58. FilePath string
  59. }
  60. // Update updates CSV file in cloud storage with new allocation data
  61. func (e *csvExporter) Update(ctx context.Context) error {
  62. allocationDates, err := e.availableAllocationDates()
  63. if err != nil {
  64. return err
  65. }
  66. exist, err := e.Storage.Exists(e.FilePath)
  67. if err != nil {
  68. return err
  69. }
  70. var result []byte
  71. // cloud storage doesn't have an existing file
  72. // dump all the data exist to the file
  73. if !exist {
  74. result, err = e.allocationsToCSV(ctx, mapTimeToSlice(allocationDates))
  75. if err != nil {
  76. return err
  77. }
  78. }
  79. // existing export file exists
  80. // scan through it and ignore all dates that are already in the file
  81. // avoid modifying existing data or producing duplicates
  82. if exist {
  83. previousExport, err := e.Storage.Read(e.FilePath)
  84. if err != nil {
  85. return err
  86. }
  87. csvDates, err := e.loadDates(previousExport)
  88. if err != nil {
  89. return err
  90. }
  91. for date := range csvDates {
  92. delete(allocationDates, date)
  93. }
  94. dateExport, err := e.allocationsToCSV(ctx, mapTimeToSlice(allocationDates))
  95. if err != nil {
  96. return err
  97. }
  98. result, err = mergeCSV([][]byte{previousExport, dateExport})
  99. if err != nil {
  100. return err
  101. }
  102. }
  103. err = e.Storage.Write(e.FilePath, result)
  104. if err != nil {
  105. return err
  106. }
  107. return nil
  108. }
  109. func (e *csvExporter) availableAllocationDates() (map[time.Time]struct{}, error) {
  110. start, end, err := e.Model.DateRange()
  111. if err != nil {
  112. return nil, err
  113. }
  114. if start != time.Date(start.Year(), start.Month(), start.Day(), 0, 0, 0, 0, time.UTC) {
  115. // start doesn't start from 00:00 UTC, it could be truncated by prometheus retention policy
  116. // skip incomplete data and begin from the day after, otherwise it may corrupt existing data
  117. start = time.Date(start.Year(), start.Month(), start.Day(), 0, 0, 0, 0, time.UTC).AddDate(0, 0, 1)
  118. }
  119. end = time.Date(end.Year(), end.Month(), end.Day(), 0, 0, 0, 0, time.UTC)
  120. dates := make(map[time.Time]struct{})
  121. for date := start; date.Before(end); date = date.AddDate(0, 0, 1) {
  122. dates[date] = struct{}{}
  123. }
  124. if len(dates) == 0 {
  125. return nil, errors.New("no allocation data available")
  126. }
  127. return dates, nil
  128. }
  129. func (e *csvExporter) allocationsToCSV(ctx context.Context, dates []time.Time) ([]byte, error) {
  130. buf := new(bytes.Buffer)
  131. err := e.writeCSVToWriter(ctx, buf, dates)
  132. if err != nil {
  133. return nil, err
  134. }
  135. return buf.Bytes(), nil
  136. }
  137. func (e *csvExporter) writeCSVToWriter(ctx context.Context, w io.Writer, dates []time.Time) error {
  138. fmtFloat := func(f float64) string {
  139. return strconv.FormatFloat(f, 'f', -1, 64)
  140. }
  141. csvWriter := csv.NewWriter(w)
  142. // TODO: confirm columns we want to export
  143. err := csvWriter.Write([]string{
  144. "Date",
  145. "Name",
  146. "CPUCoreUsageAverage",
  147. "CPUCoreRequestAverage",
  148. "CPUCost",
  149. "RAMBytesUsageAverage",
  150. "RAMBytesRequestAverage",
  151. "RAMCost",
  152. "GPUs",
  153. "GPUCost",
  154. "NetworkCost",
  155. "PVBytes",
  156. "PVCost",
  157. "TotalCost",
  158. })
  159. if err != nil {
  160. return err
  161. }
  162. lines := 0
  163. for _, date := range dates {
  164. start := time.Date(date.Year(), date.Month(), date.Day(), 0, 0, 0, 0, time.UTC)
  165. end := start.AddDate(0, 0, 1)
  166. data, err := e.Model.ComputeAllocation(start, end, 5*time.Minute)
  167. if err != nil {
  168. return err
  169. }
  170. log.Infof("fetched %d records for %s", len(data.Allocations), date.Format("2006-01-02"))
  171. for _, alloc := range data.Allocations {
  172. if err := ctx.Err(); err != nil {
  173. return err
  174. }
  175. err := csvWriter.Write([]string{
  176. date.Format("2006-01-02"),
  177. alloc.Name,
  178. fmtFloat(alloc.CPUCoreUsageAverage),
  179. fmtFloat(alloc.CPUCoreRequestAverage),
  180. fmtFloat(alloc.CPUTotalCost()),
  181. fmtFloat(alloc.RAMBytesUsageAverage),
  182. fmtFloat(alloc.RAMBytesRequestAverage),
  183. fmtFloat(alloc.RAMTotalCost()),
  184. fmtFloat(alloc.GPUs()),
  185. fmtFloat(alloc.GPUCost),
  186. fmtFloat(alloc.NetworkTotalCost()),
  187. fmtFloat(alloc.PVBytes()),
  188. fmtFloat(alloc.PVCost()),
  189. fmtFloat(alloc.TotalCost()),
  190. })
  191. if err != nil {
  192. return err
  193. }
  194. lines++
  195. }
  196. }
  197. if lines == 0 {
  198. return errNoData
  199. }
  200. csvWriter.Flush()
  201. if err := csvWriter.Error(); err != nil {
  202. return err
  203. }
  204. log.Infof("exported %d lines", lines)
  205. return nil
  206. }
  207. // loadDate scans through CSV export file and extract all dates from "Date" column
  208. func (e *csvExporter) loadDates(csvFile []byte) (map[time.Time]struct{}, error) {
  209. csvReader := csv.NewReader(bytes.NewReader(csvFile))
  210. header, err := csvReader.Read()
  211. if err != nil {
  212. return nil, errors.Wrap(err, "reading csv header")
  213. }
  214. dateColIndex := 0
  215. for i, col := range header {
  216. if col == "Date" {
  217. dateColIndex = i
  218. break
  219. }
  220. }
  221. dates := make(map[time.Time]struct{})
  222. for {
  223. row, err := csvReader.Read()
  224. if err == io.EOF {
  225. break
  226. }
  227. if err != nil {
  228. return nil, errors.Wrap(err, "reading csv row")
  229. }
  230. date, err := time.Parse("2006-01-02", row[dateColIndex])
  231. if err != nil {
  232. return nil, errors.Wrap(err, "parsing date")
  233. }
  234. dates[date] = struct{}{}
  235. }
  236. return dates, nil
  237. }
  238. // mergeCSV merges multiple csv files into one.
  239. // Files may have different headers, but the result will have a header that is a union of all headers.
  240. // The main goal here is to allow changing CSV format without breaking or loosing existing data.
  241. func mergeCSV(files [][]byte) ([]byte, error) {
  242. var err error
  243. headers := make([][]string, 0, len(files))
  244. csvReaders := make([]*csv.Reader, 0, len(files))
  245. // first, get information about the result header
  246. for _, file := range files {
  247. csvReader := csv.NewReader(bytes.NewReader(file))
  248. header, err := csvReader.Read()
  249. if errors.Is(err, io.EOF) {
  250. // ignore empty files
  251. continue
  252. }
  253. if err != nil {
  254. return nil, errors.Wrap(err, "reading header of csv file")
  255. }
  256. headers = append(headers, header)
  257. csvReaders = append(csvReaders, csvReader)
  258. }
  259. mapping, header := combineHeaders(headers)
  260. output := new(bytes.Buffer)
  261. csvWriter := csv.NewWriter(output)
  262. err = csvWriter.Write(mergeHeaders(headers))
  263. if err != nil {
  264. return nil, errors.Wrap(err, "writing header to csv file")
  265. }
  266. for csvIndex, csvReader := range csvReaders {
  267. for {
  268. inputLine, err := csvReader.Read()
  269. if errors.Is(err, io.EOF) {
  270. break
  271. }
  272. if err != nil {
  273. return nil, errors.Wrap(err, "reading csv file line")
  274. }
  275. outputLine := make([]string, len(header))
  276. for colIndex := range header {
  277. destColIndex, ok := mapping[csvIndex][colIndex]
  278. if !ok {
  279. continue
  280. }
  281. outputLine[destColIndex] = inputLine[colIndex]
  282. }
  283. err = csvWriter.Write(outputLine)
  284. if err != nil {
  285. return nil, errors.Wrap(err, "writing line to csv file")
  286. }
  287. }
  288. }
  289. csvWriter.Flush()
  290. if csvWriter.Error() != nil {
  291. return nil, errors.Wrapf(csvWriter.Error(), "flushing csv file")
  292. }
  293. return output.Bytes(), nil
  294. }
  295. func combineHeaders(headers [][]string) ([]map[int]int, []string) {
  296. result := make([]string, 0)
  297. indices := make([]map[int]int, len(headers))
  298. for i, header := range headers {
  299. indices[i] = make(map[int]int)
  300. for j, column := range header {
  301. if !contains(result, column) {
  302. result = append(result, column)
  303. indices[i][j] = len(result) - 1
  304. } else {
  305. indices[i][j] = indexOf(result, column)
  306. }
  307. }
  308. }
  309. return indices, result
  310. }
  311. func mergeHeaders(headers [][]string) []string {
  312. result := make([]string, 0)
  313. for _, header := range headers {
  314. for _, column := range header {
  315. if !contains(result, column) {
  316. result = append(result, column)
  317. }
  318. }
  319. }
  320. return result
  321. }
  322. func contains(slice []string, item string) bool {
  323. for _, element := range slice {
  324. if element == item {
  325. return true
  326. }
  327. }
  328. return false
  329. }
  330. func indexOf(slice []string, element string) int {
  331. for i, e := range slice {
  332. if e == element {
  333. return i
  334. }
  335. }
  336. return -1
  337. }
  338. func mapTimeToSlice(data map[time.Time]struct{}) []time.Time {
  339. result := make([]time.Time, 0, len(data))
  340. for key := range data {
  341. result = append(result, key)
  342. }
  343. sort.Slice(result, func(i, j int) bool {
  344. return result[i].Before(result[j])
  345. })
  346. return result
  347. }