csv_export.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. package costmodel
  2. import (
  3. "context"
  4. "encoding/csv"
  5. "encoding/json"
  6. "errors"
  7. "fmt"
  8. "io"
  9. "os"
  10. "sort"
  11. "strconv"
  12. "time"
  13. "github.com/opencost/opencost/core/pkg/log"
  14. "github.com/opencost/opencost/core/pkg/opencost"
  15. "github.com/opencost/opencost/pkg/filemanager"
  16. )
  17. type AllocationModel interface {
  18. ComputeAllocation(start, end time.Time, resolution time.Duration) (*opencost.AllocationSet, error)
  19. DateRange() (time.Time, time.Time, error)
  20. }
  21. var errNoData = errors.New("no data")
  22. func UpdateCSV(ctx context.Context, fileManager filemanager.FileManager, model AllocationModel, labelsAll bool, labels []string) error {
  23. exporter := &csvExporter{
  24. FileManager: fileManager,
  25. Model: model,
  26. LabelsAll: labelsAll,
  27. Labels: labels,
  28. }
  29. return exporter.Update(ctx)
  30. }
  31. type csvExporter struct {
  32. FileManager filemanager.FileManager
  33. Model AllocationModel
  34. Labels []string // If not empty, create a column for each label prefixed with "Label_"
  35. LabelsAll bool // if true, export all labels to a "Labels" column in JSON format
  36. }
  37. // Update updates CSV file in cloud storage with new allocation data
  38. func (e *csvExporter) Update(ctx context.Context) error {
  39. allocationDates, err := e.availableAllocationDates()
  40. if err != nil {
  41. return err
  42. }
  43. if len(allocationDates) == 0 {
  44. return errors.New("no data to export from prometheus")
  45. }
  46. resultTmp, err := os.CreateTemp("", "opencost-export-*.csv")
  47. if err != nil {
  48. return err
  49. }
  50. defer closeAndDelete(resultTmp)
  51. previousExportTmp, err := os.CreateTemp("", "opencost-previous-export-*.csv")
  52. if err != nil {
  53. return err
  54. }
  55. defer closeAndDelete(previousExportTmp)
  56. err = e.FileManager.Download(ctx, previousExportTmp)
  57. switch {
  58. case errors.Is(err, filemanager.ErrNotFound):
  59. // there is no previous file, so we need to create it
  60. err := e.writeCSVToWriter(ctx, resultTmp, mapTimeToSlice(allocationDates))
  61. if err != nil {
  62. return err
  63. }
  64. case err != nil:
  65. return err
  66. default:
  67. // existing export file exists
  68. // scan through it and ignore all dates that are already in the file
  69. // avoid modifying existing data or producing duplicates
  70. err := e.updateExportCSV(ctx, previousExportTmp, allocationDates, resultTmp)
  71. if err != nil {
  72. return err
  73. }
  74. }
  75. // we just wrote to the file, so we need to seek to the beginning, so we can read from it
  76. _, err = resultTmp.Seek(0, io.SeekStart)
  77. if err != nil {
  78. return err
  79. }
  80. err = e.FileManager.Upload(ctx, resultTmp)
  81. if err != nil {
  82. return err
  83. }
  84. log.Info("CSV export updated")
  85. return nil
  86. }
  87. func (e *csvExporter) updateExportCSV(ctx context.Context, previousExportTmp *os.File, allocationDates map[time.Time]struct{}, result *os.File) error {
  88. previousDates, err := e.loadDates(previousExportTmp)
  89. if err != nil {
  90. return err
  91. }
  92. for date := range previousDates {
  93. delete(allocationDates, date)
  94. }
  95. if len(allocationDates) == 0 {
  96. log.Info("export file in cloud storage already contain data for all dates, skipping update")
  97. return errNoData
  98. }
  99. newExportTmp, err := os.CreateTemp("", "opencost-new-export-*.csv")
  100. if err != nil {
  101. return err
  102. }
  103. defer closeAndDelete(newExportTmp)
  104. err = e.writeCSVToWriter(ctx, newExportTmp, mapTimeToSlice(allocationDates))
  105. if err != nil {
  106. return err
  107. }
  108. err = mergeCSV([]*os.File{previousExportTmp, newExportTmp}, result)
  109. if err != nil {
  110. return err
  111. }
  112. return nil
  113. }
  114. func (e *csvExporter) availableAllocationDates() (map[time.Time]struct{}, error) {
  115. start, end, err := e.Model.DateRange()
  116. if err != nil {
  117. return nil, err
  118. }
  119. if start != time.Date(start.Year(), start.Month(), start.Day(), 0, 0, 0, 0, time.UTC) {
  120. // start doesn't start from 00:00 UTC, it could be truncated by prometheus retention policy
  121. // skip incomplete data and begin from the day after, otherwise it may corrupt existing data
  122. start = time.Date(start.Year(), start.Month(), start.Day(), 0, 0, 0, 0, time.UTC).AddDate(0, 0, 1)
  123. }
  124. end = time.Date(end.Year(), end.Month(), end.Day(), 0, 0, 0, 0, time.UTC)
  125. dates := make(map[time.Time]struct{})
  126. for date := start; date.Before(end); date = date.AddDate(0, 0, 1) {
  127. dates[date] = struct{}{}
  128. }
  129. if len(dates) == 0 {
  130. return nil, errNoData
  131. }
  132. return dates, nil
  133. }
  134. func (e *csvExporter) writeCSVToWriter(ctx context.Context, w io.Writer, dates []time.Time) error {
  135. fmtFloat := func(f float64) string {
  136. return strconv.FormatFloat(f, 'f', -1, 64)
  137. }
  138. type rowData struct {
  139. date time.Time
  140. alloc *opencost.Allocation
  141. }
  142. type columnDef struct {
  143. column string
  144. value func(data rowData) string
  145. }
  146. csvDef := []columnDef{
  147. {
  148. column: "Date",
  149. value: func(data rowData) string {
  150. return data.date.Format("2006-01-02")
  151. },
  152. },
  153. {
  154. column: "Namespace",
  155. value: func(data rowData) string {
  156. return data.alloc.Properties.Namespace
  157. },
  158. },
  159. {
  160. column: "ControllerKind",
  161. value: func(data rowData) string {
  162. return data.alloc.Properties.ControllerKind
  163. },
  164. },
  165. {
  166. column: "ControllerName",
  167. value: func(data rowData) string {
  168. return data.alloc.Properties.Controller
  169. },
  170. },
  171. {
  172. column: "Pod",
  173. value: func(data rowData) string {
  174. return data.alloc.Properties.Pod
  175. },
  176. },
  177. {
  178. column: "Container",
  179. value: func(data rowData) string {
  180. return data.alloc.Properties.Container
  181. },
  182. },
  183. {
  184. column: "CPUCoreUsageAverage",
  185. value: func(data rowData) string {
  186. return fmtFloat(data.alloc.CPUCoreUsageAverage)
  187. },
  188. },
  189. {
  190. column: "CPUCoreRequestAverage",
  191. value: func(data rowData) string {
  192. return fmtFloat(data.alloc.CPUCoreRequestAverage)
  193. },
  194. },
  195. {
  196. column: "RAMBytesUsageAverage",
  197. value: func(data rowData) string {
  198. return fmtFloat(data.alloc.RAMBytesUsageAverage)
  199. },
  200. },
  201. {
  202. column: "RAMBytesRequestAverage",
  203. value: func(data rowData) string {
  204. return fmtFloat(data.alloc.RAMBytesRequestAverage)
  205. },
  206. },
  207. {
  208. column: "NetworkReceiveBytes",
  209. value: func(data rowData) string {
  210. return fmtFloat(data.alloc.NetworkReceiveBytes)
  211. },
  212. },
  213. {
  214. column: "NetworkTransferBytes",
  215. value: func(data rowData) string {
  216. return fmtFloat(data.alloc.NetworkTransferBytes)
  217. },
  218. },
  219. {
  220. column: "GPUs",
  221. value: func(data rowData) string {
  222. return fmtFloat(data.alloc.GPUs())
  223. },
  224. },
  225. {
  226. column: "PVBytes",
  227. value: func(data rowData) string {
  228. return fmtFloat(data.alloc.PVBytes())
  229. },
  230. },
  231. {
  232. column: "CPUCost",
  233. value: func(data rowData) string {
  234. return fmtFloat(data.alloc.CPUTotalCost())
  235. },
  236. },
  237. {
  238. column: "RAMCost",
  239. value: func(data rowData) string {
  240. return fmtFloat(data.alloc.RAMTotalCost())
  241. },
  242. },
  243. {
  244. column: "NetworkCost",
  245. value: func(data rowData) string {
  246. return fmtFloat(data.alloc.NetworkTotalCost())
  247. },
  248. },
  249. {
  250. column: "PVCost",
  251. value: func(data rowData) string {
  252. return fmtFloat(data.alloc.PVTotalCost())
  253. },
  254. },
  255. {
  256. column: "GPUCost",
  257. value: func(data rowData) string {
  258. return fmtFloat(data.alloc.GPUTotalCost())
  259. },
  260. },
  261. {
  262. column: "TotalCost",
  263. value: func(data rowData) string {
  264. return fmtFloat(data.alloc.TotalCost())
  265. },
  266. },
  267. }
  268. if e.LabelsAll {
  269. csvDef = append(csvDef, columnDef{
  270. column: "Labels",
  271. value: func(data rowData) string {
  272. return fmtLabelsCSV(data.alloc.Properties.Labels)
  273. },
  274. })
  275. }
  276. for i := range e.Labels {
  277. label := e.Labels[i] // it's important to copy the label name, otherwise all closures will reference the same label
  278. csvDef = append(csvDef, columnDef{
  279. column: "Label_" + label,
  280. value: func(data rowData) string {
  281. value, _ := data.alloc.Properties.Labels[label]
  282. return value
  283. },
  284. })
  285. }
  286. header := make([]string, 0, len(csvDef))
  287. for _, def := range csvDef {
  288. header = append(header, def.column)
  289. }
  290. csvWriter := csv.NewWriter(w)
  291. lines := 0
  292. err := csvWriter.Write(header)
  293. if err != nil {
  294. return fmt.Errorf("failed to write header: %w", err)
  295. }
  296. log.Infof("writing CSV with header: %v", header)
  297. for _, date := range dates {
  298. start := time.Date(date.Year(), date.Month(), date.Day(), 0, 0, 0, 0, time.UTC)
  299. end := start.AddDate(0, 0, 1)
  300. data, err := e.Model.ComputeAllocation(start, end, 5*time.Minute)
  301. if err != nil {
  302. return err
  303. }
  304. log.Infof("fetched %d records for %s", len(data.Allocations), date.Format("2006-01-02"))
  305. for _, alloc := range data.Allocations {
  306. if err := ctx.Err(); err != nil {
  307. return err
  308. }
  309. row := make([]string, 0, len(csvDef))
  310. for _, def := range csvDef {
  311. row = append(row, def.value(rowData{date: date, alloc: alloc}))
  312. }
  313. err := csvWriter.Write(row)
  314. if err != nil {
  315. return fmt.Errorf("failed to write csv row: %w", err)
  316. }
  317. lines++
  318. }
  319. }
  320. if lines == 0 {
  321. return errNoData
  322. }
  323. csvWriter.Flush()
  324. if err := csvWriter.Error(); err != nil {
  325. return err
  326. }
  327. log.Infof("exported %d lines", lines)
  328. return nil
  329. }
  330. func fmtLabelsCSV(labels map[string]string) string {
  331. if len(labels) == 0 {
  332. return ""
  333. }
  334. data, err := json.Marshal(labels)
  335. if err != nil {
  336. log.Errorf("failed to marshal labels: %s", err)
  337. return ""
  338. }
  339. return string(data)
  340. }
  341. // loadDate scans through CSV export file and extract all dates from "Date" column
  342. func (e *csvExporter) loadDates(csvFile *os.File) (map[time.Time]struct{}, error) {
  343. _, err := csvFile.Seek(0, io.SeekStart)
  344. if err != nil {
  345. return nil, fmt.Errorf("seeking to the beginning of csv file: %w", err)
  346. }
  347. csvReader := csv.NewReader(csvFile)
  348. header, err := csvReader.Read()
  349. if err != nil {
  350. return nil, fmt.Errorf("reading csv header: %w", err)
  351. }
  352. dateColIndex := 0
  353. for i, col := range header {
  354. if col == "Date" {
  355. dateColIndex = i
  356. break
  357. }
  358. }
  359. dates := make(map[time.Time]struct{})
  360. for {
  361. row, err := csvReader.Read()
  362. if errors.Is(err, io.EOF) {
  363. break
  364. }
  365. if err != nil {
  366. return nil, fmt.Errorf("reading csv row: %w", err)
  367. }
  368. date, err := time.Parse("2006-01-02", row[dateColIndex])
  369. if err != nil {
  370. return nil, fmt.Errorf("parsing date: %w", err)
  371. }
  372. dates[date] = struct{}{}
  373. }
  374. return dates, nil
  375. }
  376. // mergeCSV merges multiple csv files into one.
  377. // Files may have different headers, but the result will have a header that is a union of all headers.
  378. // The main goal here is to allow changing CSV format without breaking or loosing existing data.
  379. func mergeCSV(input []*os.File, output *os.File) error {
  380. var err error
  381. headers := make([][]string, 0, len(input))
  382. csvReaders := make([]*csv.Reader, 0, len(input))
  383. // first, get information about the result header
  384. for _, file := range input {
  385. _, err = file.Seek(0, io.SeekStart)
  386. if err != nil {
  387. return fmt.Errorf("seeking to the beginning of csv file: %w", err)
  388. }
  389. csvReader := csv.NewReader(file)
  390. header, err := csvReader.Read()
  391. if errors.Is(err, io.EOF) {
  392. // ignore empty files
  393. continue
  394. }
  395. if err != nil {
  396. return fmt.Errorf("reading header of csv file: %w", err)
  397. }
  398. headers = append(headers, header)
  399. csvReaders = append(csvReaders, csvReader)
  400. }
  401. mapping, header := combineHeaders(headers)
  402. csvWriter := csv.NewWriter(output)
  403. err = csvWriter.Write(mergeHeaders(headers))
  404. if err != nil {
  405. return fmt.Errorf("writing header to csv file: %w", err)
  406. }
  407. for csvIndex, csvReader := range csvReaders {
  408. for {
  409. inputLine, err := csvReader.Read()
  410. if errors.Is(err, io.EOF) {
  411. break
  412. }
  413. if err != nil {
  414. return fmt.Errorf("reading csv file line: %w", err)
  415. }
  416. outputLine := make([]string, len(header))
  417. for colIndex := range header {
  418. destColIndex, ok := mapping[csvIndex][colIndex]
  419. if !ok {
  420. continue
  421. }
  422. outputLine[destColIndex] = inputLine[colIndex]
  423. }
  424. err = csvWriter.Write(outputLine)
  425. if err != nil {
  426. return fmt.Errorf("writing line to csv file: %w", err)
  427. }
  428. }
  429. }
  430. csvWriter.Flush()
  431. // check for errors from the Flush
  432. if csvWriter.Error() != nil {
  433. return fmt.Errorf("flushing csv file: %w", csvWriter.Error())
  434. }
  435. return nil
  436. }
  437. func combineHeaders(headers [][]string) ([]map[int]int, []string) {
  438. result := make([]string, 0)
  439. indices := make([]map[int]int, len(headers))
  440. for i, header := range headers {
  441. indices[i] = make(map[int]int)
  442. for j, column := range header {
  443. if !contains(result, column) {
  444. result = append(result, column)
  445. indices[i][j] = len(result) - 1
  446. } else {
  447. indices[i][j] = indexOf(result, column)
  448. }
  449. }
  450. }
  451. return indices, result
  452. }
  453. func mergeHeaders(headers [][]string) []string {
  454. result := make([]string, 0)
  455. for _, header := range headers {
  456. for _, column := range header {
  457. if !contains(result, column) {
  458. result = append(result, column)
  459. }
  460. }
  461. }
  462. return result
  463. }
  464. func contains(slice []string, item string) bool {
  465. for _, element := range slice {
  466. if element == item {
  467. return true
  468. }
  469. }
  470. return false
  471. }
  472. func indexOf(slice []string, element string) int {
  473. for i, e := range slice {
  474. if e == element {
  475. return i
  476. }
  477. }
  478. return -1
  479. }
  480. func mapTimeToSlice(data map[time.Time]struct{}) []time.Time {
  481. result := make([]time.Time, 0, len(data))
  482. for key := range data {
  483. result = append(result, key)
  484. }
  485. sort.Slice(result, func(i, j int) bool {
  486. return result[i].Before(result[j])
  487. })
  488. return result
  489. }
  490. func closeAndDelete(f *os.File) {
  491. if err := f.Close(); err != nil {
  492. log.Errorf("error closing file: %v", err)
  493. }
  494. if err := os.Remove(f.Name()); err != nil {
  495. log.Errorf("error deleting file: %v", err)
  496. }
  497. }