2
0

csv_export.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. package costmodel
  2. import (
  3. "context"
  4. "encoding/csv"
  5. "encoding/json"
  6. "errors"
  7. "fmt"
  8. "io"
  9. "os"
  10. "sort"
  11. "strconv"
  12. "time"
  13. "github.com/opencost/opencost/core/pkg/log"
  14. "github.com/opencost/opencost/core/pkg/opencost"
  15. "github.com/opencost/opencost/pkg/env"
  16. "github.com/opencost/opencost/pkg/filemanager"
  17. )
  18. type AllocationModel interface {
  19. ComputeAllocation(start, end time.Time) (*opencost.AllocationSet, error)
  20. DateRange(limitDays int) (time.Time, time.Time, error)
  21. }
  22. var errNoData = errors.New("no data")
  23. func UpdateCSV(ctx context.Context, fileManager filemanager.FileManager, model AllocationModel, labelsAll bool, labels []string) error {
  24. exporter := &csvExporter{
  25. FileManager: fileManager,
  26. Model: model,
  27. LabelsAll: labelsAll,
  28. Labels: labels,
  29. }
  30. return exporter.Update(ctx)
  31. }
  32. type csvExporter struct {
  33. FileManager filemanager.FileManager
  34. Model AllocationModel
  35. Labels []string // If not empty, create a column for each label prefixed with "Label_"
  36. LabelsAll bool // if true, export all labels to a "Labels" column in JSON format
  37. }
  38. // Update updates CSV file in cloud storage with new allocation data
  39. func (e *csvExporter) Update(ctx context.Context) error {
  40. allocationDates, err := e.availableAllocationDates()
  41. if err != nil {
  42. return err
  43. }
  44. if len(allocationDates) == 0 {
  45. return errors.New("no data to export from prometheus")
  46. }
  47. resultTmp, err := os.CreateTemp("", "opencost-export-*.csv")
  48. if err != nil {
  49. return err
  50. }
  51. defer closeAndDelete(resultTmp)
  52. previousExportTmp, err := os.CreateTemp("", "opencost-previous-export-*.csv")
  53. if err != nil {
  54. return err
  55. }
  56. defer closeAndDelete(previousExportTmp)
  57. err = e.FileManager.Download(ctx, previousExportTmp)
  58. switch {
  59. case errors.Is(err, filemanager.ErrNotFound):
  60. // there is no previous file, so we need to create it
  61. err := e.writeCSVToWriter(ctx, resultTmp, mapTimeToSlice(allocationDates))
  62. if err != nil {
  63. return err
  64. }
  65. case err != nil:
  66. return err
  67. default:
  68. // existing export file exists
  69. // scan through it and ignore all dates that are already in the file
  70. // avoid modifying existing data or producing duplicates
  71. err := e.updateExportCSV(ctx, previousExportTmp, allocationDates, resultTmp)
  72. if err != nil {
  73. return err
  74. }
  75. }
  76. // we just wrote to the file, so we need to seek to the beginning, so we can read from it
  77. _, err = resultTmp.Seek(0, io.SeekStart)
  78. if err != nil {
  79. return err
  80. }
  81. err = e.FileManager.Upload(ctx, resultTmp)
  82. if err != nil {
  83. return err
  84. }
  85. log.Info("CSV export updated")
  86. return nil
  87. }
  88. func (e *csvExporter) updateExportCSV(ctx context.Context, previousExportTmp *os.File, allocationDates map[time.Time]struct{}, result *os.File) error {
  89. previousDates, err := e.loadDates(previousExportTmp)
  90. if err != nil {
  91. return err
  92. }
  93. for date := range previousDates {
  94. delete(allocationDates, date)
  95. }
  96. if len(allocationDates) == 0 {
  97. log.Info("export file in cloud storage already contain data for all dates, skipping update")
  98. return errNoData
  99. }
  100. newExportTmp, err := os.CreateTemp("", "opencost-new-export-*.csv")
  101. if err != nil {
  102. return err
  103. }
  104. defer closeAndDelete(newExportTmp)
  105. err = e.writeCSVToWriter(ctx, newExportTmp, mapTimeToSlice(allocationDates))
  106. if err != nil {
  107. return err
  108. }
  109. err = mergeCSV([]*os.File{previousExportTmp, newExportTmp}, result)
  110. if err != nil {
  111. return err
  112. }
  113. return nil
  114. }
  115. func (e *csvExporter) availableAllocationDates() (map[time.Time]struct{}, error) {
  116. start, end, err := e.Model.DateRange(env.GetExportCSVMaxDays())
  117. if err != nil {
  118. return nil, err
  119. }
  120. if start != time.Date(start.Year(), start.Month(), start.Day(), 0, 0, 0, 0, time.UTC) {
  121. // start doesn't start from 00:00 UTC, it could be truncated by prometheus retention policy
  122. // skip incomplete data and begin from the day after, otherwise it may corrupt existing data
  123. start = time.Date(start.Year(), start.Month(), start.Day(), 0, 0, 0, 0, time.UTC).AddDate(0, 0, 1)
  124. }
  125. end = time.Date(end.Year(), end.Month(), end.Day(), 0, 0, 0, 0, time.UTC)
  126. dates := make(map[time.Time]struct{})
  127. for date := start; date.Before(end); date = date.AddDate(0, 0, 1) {
  128. dates[date] = struct{}{}
  129. }
  130. if len(dates) == 0 {
  131. return nil, errNoData
  132. }
  133. return dates, nil
  134. }
  135. func (e *csvExporter) writeCSVToWriter(ctx context.Context, w io.Writer, dates []time.Time) error {
  136. fmtFloat := func(f float64) string {
  137. return strconv.FormatFloat(f, 'f', -1, 64)
  138. }
  139. type rowData struct {
  140. date time.Time
  141. alloc *opencost.Allocation
  142. }
  143. type columnDef struct {
  144. column string
  145. value func(data rowData) string
  146. }
  147. csvDef := []columnDef{
  148. {
  149. column: "Date",
  150. value: func(data rowData) string {
  151. return data.date.Format("2006-01-02")
  152. },
  153. },
  154. {
  155. column: "Namespace",
  156. value: func(data rowData) string {
  157. return data.alloc.Properties.Namespace
  158. },
  159. },
  160. {
  161. column: "Cluster",
  162. value: func(data rowData) string {
  163. if data.alloc.Properties == nil {
  164. return ""
  165. }
  166. return data.alloc.Properties.Cluster
  167. },
  168. },
  169. {
  170. column: "ControllerKind",
  171. value: func(data rowData) string {
  172. return data.alloc.Properties.ControllerKind
  173. },
  174. },
  175. {
  176. column: "ControllerName",
  177. value: func(data rowData) string {
  178. return data.alloc.Properties.Controller
  179. },
  180. },
  181. {
  182. column: "Pod",
  183. value: func(data rowData) string {
  184. return data.alloc.Properties.Pod
  185. },
  186. },
  187. {
  188. column: "Container",
  189. value: func(data rowData) string {
  190. return data.alloc.Properties.Container
  191. },
  192. },
  193. {
  194. column: "CPUCoreUsageAverage",
  195. value: func(data rowData) string {
  196. return fmtFloat(data.alloc.CPUCoreUsageAverage)
  197. },
  198. },
  199. {
  200. column: "CPUCoreRequestAverage",
  201. value: func(data rowData) string {
  202. return fmtFloat(data.alloc.CPUCoreRequestAverage)
  203. },
  204. },
  205. {
  206. column: "RAMBytesUsageAverage",
  207. value: func(data rowData) string {
  208. return fmtFloat(data.alloc.RAMBytesUsageAverage)
  209. },
  210. },
  211. {
  212. column: "RAMBytesRequestAverage",
  213. value: func(data rowData) string {
  214. return fmtFloat(data.alloc.RAMBytesRequestAverage)
  215. },
  216. },
  217. {
  218. column: "NetworkReceiveBytes",
  219. value: func(data rowData) string {
  220. return fmtFloat(data.alloc.NetworkReceiveBytes)
  221. },
  222. },
  223. {
  224. column: "NetworkTransferBytes",
  225. value: func(data rowData) string {
  226. return fmtFloat(data.alloc.NetworkTransferBytes)
  227. },
  228. },
  229. {
  230. column: "GPUs",
  231. value: func(data rowData) string {
  232. return fmtFloat(data.alloc.GPUs())
  233. },
  234. },
  235. {
  236. column: "PVBytes",
  237. value: func(data rowData) string {
  238. return fmtFloat(data.alloc.PVBytes())
  239. },
  240. },
  241. {
  242. column: "CPUCost",
  243. value: func(data rowData) string {
  244. return fmtFloat(data.alloc.CPUTotalCost())
  245. },
  246. },
  247. {
  248. column: "RAMCost",
  249. value: func(data rowData) string {
  250. return fmtFloat(data.alloc.RAMTotalCost())
  251. },
  252. },
  253. {
  254. column: "NetworkCost",
  255. value: func(data rowData) string {
  256. return fmtFloat(data.alloc.NetworkTotalCost())
  257. },
  258. },
  259. {
  260. column: "PVCost",
  261. value: func(data rowData) string {
  262. return fmtFloat(data.alloc.PVTotalCost())
  263. },
  264. },
  265. {
  266. column: "GPUCost",
  267. value: func(data rowData) string {
  268. return fmtFloat(data.alloc.GPUTotalCost())
  269. },
  270. },
  271. {
  272. column: "TotalCost",
  273. value: func(data rowData) string {
  274. return fmtFloat(data.alloc.TotalCost())
  275. },
  276. },
  277. }
  278. if e.LabelsAll {
  279. csvDef = append(csvDef, columnDef{
  280. column: "Labels",
  281. value: func(data rowData) string {
  282. return fmtLabelsCSV(data.alloc.Properties.Labels)
  283. },
  284. })
  285. }
  286. for i := range e.Labels {
  287. label := e.Labels[i] // it's important to copy the label name, otherwise all closures will reference the same label
  288. csvDef = append(csvDef, columnDef{
  289. column: "Label_" + label,
  290. value: func(data rowData) string {
  291. value := data.alloc.Properties.Labels[label]
  292. return value
  293. },
  294. })
  295. }
  296. header := make([]string, 0, len(csvDef))
  297. for _, def := range csvDef {
  298. header = append(header, def.column)
  299. }
  300. csvWriter := csv.NewWriter(w)
  301. lines := 0
  302. err := csvWriter.Write(header)
  303. if err != nil {
  304. return fmt.Errorf("failed to write header: %w", err)
  305. }
  306. log.Infof("writing CSV with header: %v", header)
  307. for _, date := range dates {
  308. start := time.Date(date.Year(), date.Month(), date.Day(), 0, 0, 0, 0, time.UTC)
  309. end := start.AddDate(0, 0, 1)
  310. data, err := e.Model.ComputeAllocation(start, end)
  311. if err != nil {
  312. log.Warnf("Failed to compute allocation for %s: %v - skipping this date", date.Format("2006-01-02"), err)
  313. continue // Skip this date instead of failing the entire export
  314. }
  315. log.Infof("fetched %d records for %s", len(data.Allocations), date.Format("2006-01-02"))
  316. for _, alloc := range data.Allocations {
  317. if err := ctx.Err(); err != nil {
  318. return err
  319. }
  320. row := make([]string, 0, len(csvDef))
  321. for _, def := range csvDef {
  322. row = append(row, def.value(rowData{date: date, alloc: alloc}))
  323. }
  324. err := csvWriter.Write(row)
  325. if err != nil {
  326. return fmt.Errorf("failed to write csv row: %w", err)
  327. }
  328. lines++
  329. }
  330. }
  331. if lines == 0 {
  332. log.Warnf("CSV export completed but no allocation data was found for the requested date range")
  333. return errNoData
  334. }
  335. csvWriter.Flush()
  336. if err := csvWriter.Error(); err != nil {
  337. return err
  338. }
  339. log.Infof("exported %d lines", lines)
  340. return nil
  341. }
  342. func fmtLabelsCSV(labels map[string]string) string {
  343. if len(labels) == 0 {
  344. return ""
  345. }
  346. data, err := json.Marshal(labels)
  347. if err != nil {
  348. log.Errorf("failed to marshal labels: %s", err)
  349. return ""
  350. }
  351. return string(data)
  352. }
  353. // loadDate scans through CSV export file and extract all dates from "Date" column
  354. func (e *csvExporter) loadDates(csvFile *os.File) (map[time.Time]struct{}, error) {
  355. _, err := csvFile.Seek(0, io.SeekStart)
  356. if err != nil {
  357. return nil, fmt.Errorf("seeking to the beginning of csv file: %w", err)
  358. }
  359. csvReader := csv.NewReader(csvFile)
  360. header, err := csvReader.Read()
  361. if err != nil {
  362. return nil, fmt.Errorf("reading csv header: %w", err)
  363. }
  364. dateColIndex := 0
  365. for i, col := range header {
  366. if col == "Date" {
  367. dateColIndex = i
  368. break
  369. }
  370. }
  371. dates := make(map[time.Time]struct{})
  372. for {
  373. row, err := csvReader.Read()
  374. if errors.Is(err, io.EOF) {
  375. break
  376. }
  377. if err != nil {
  378. return nil, fmt.Errorf("reading csv row: %w", err)
  379. }
  380. date, err := time.Parse("2006-01-02", row[dateColIndex])
  381. if err != nil {
  382. return nil, fmt.Errorf("parsing date: %w", err)
  383. }
  384. dates[date] = struct{}{}
  385. }
  386. return dates, nil
  387. }
  388. // mergeCSV merges multiple csv files into one.
  389. // Files may have different headers, but the result will have a header that is a union of all headers.
  390. // The main goal here is to allow changing CSV format without breaking or loosing existing data.
  391. func mergeCSV(input []*os.File, output *os.File) error {
  392. var err error
  393. headers := make([][]string, 0, len(input))
  394. csvReaders := make([]*csv.Reader, 0, len(input))
  395. // first, get information about the result header
  396. for _, file := range input {
  397. _, err = file.Seek(0, io.SeekStart)
  398. if err != nil {
  399. return fmt.Errorf("seeking to the beginning of csv file: %w", err)
  400. }
  401. csvReader := csv.NewReader(file)
  402. header, err := csvReader.Read()
  403. if errors.Is(err, io.EOF) {
  404. // ignore empty files
  405. continue
  406. }
  407. if err != nil {
  408. return fmt.Errorf("reading header of csv file: %w", err)
  409. }
  410. headers = append(headers, header)
  411. csvReaders = append(csvReaders, csvReader)
  412. }
  413. mapping, header := combineHeaders(headers)
  414. csvWriter := csv.NewWriter(output)
  415. err = csvWriter.Write(mergeHeaders(headers))
  416. if err != nil {
  417. return fmt.Errorf("writing header to csv file: %w", err)
  418. }
  419. for csvIndex, csvReader := range csvReaders {
  420. for {
  421. inputLine, err := csvReader.Read()
  422. if errors.Is(err, io.EOF) {
  423. break
  424. }
  425. if err != nil {
  426. return fmt.Errorf("reading csv file line: %w", err)
  427. }
  428. outputLine := make([]string, len(header))
  429. for colIndex := range header {
  430. destColIndex, ok := mapping[csvIndex][colIndex]
  431. if !ok {
  432. continue
  433. }
  434. outputLine[destColIndex] = inputLine[colIndex]
  435. }
  436. err = csvWriter.Write(outputLine)
  437. if err != nil {
  438. return fmt.Errorf("writing line to csv file: %w", err)
  439. }
  440. }
  441. }
  442. csvWriter.Flush()
  443. // check for errors from the Flush
  444. if csvWriter.Error() != nil {
  445. return fmt.Errorf("flushing csv file: %w", csvWriter.Error())
  446. }
  447. return nil
  448. }
  449. func combineHeaders(headers [][]string) ([]map[int]int, []string) {
  450. result := make([]string, 0)
  451. indices := make([]map[int]int, len(headers))
  452. for i, header := range headers {
  453. indices[i] = make(map[int]int)
  454. for j, column := range header {
  455. if !contains(result, column) {
  456. result = append(result, column)
  457. indices[i][j] = len(result) - 1
  458. } else {
  459. indices[i][j] = indexOf(result, column)
  460. }
  461. }
  462. }
  463. return indices, result
  464. }
  465. func mergeHeaders(headers [][]string) []string {
  466. result := make([]string, 0)
  467. for _, header := range headers {
  468. for _, column := range header {
  469. if !contains(result, column) {
  470. result = append(result, column)
  471. }
  472. }
  473. }
  474. return result
  475. }
  476. func contains(slice []string, item string) bool {
  477. for _, element := range slice {
  478. if element == item {
  479. return true
  480. }
  481. }
  482. return false
  483. }
  484. func indexOf(slice []string, element string) int {
  485. for i, e := range slice {
  486. if e == element {
  487. return i
  488. }
  489. }
  490. return -1
  491. }
  492. func mapTimeToSlice(data map[time.Time]struct{}) []time.Time {
  493. result := make([]time.Time, 0, len(data))
  494. for key := range data {
  495. result = append(result, key)
  496. }
  497. sort.Slice(result, func(i, j int) bool {
  498. return result[i].Before(result[j])
  499. })
  500. return result
  501. }
  502. func closeAndDelete(f *os.File) {
  503. if err := f.Close(); err != nil {
  504. log.Errorf("error closing file: %v", err)
  505. }
  506. if err := os.Remove(f.Name()); err != nil {
  507. log.Errorf("error deleting file: %v", err)
  508. }
  509. }