csv_export.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. package costmodel
  2. import (
  3. "context"
  4. "encoding/csv"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "os"
  9. "sort"
  10. "strconv"
  11. "time"
  12. "github.com/opencost/opencost/pkg/filemanager"
  13. "github.com/opencost/opencost/pkg/kubecost"
  14. "github.com/opencost/opencost/pkg/log"
  15. )
  16. type AllocationModel interface {
  17. ComputeAllocation(start, end time.Time, resolution time.Duration) (*kubecost.AllocationSet, error)
  18. DateRange() (time.Time, time.Time, error)
  19. }
  20. var errNoData = errors.New("no data")
  21. func UpdateCSV(ctx context.Context, fileManager filemanager.FileManager, model AllocationModel) error {
  22. exporter := &csvExporter{
  23. FileManager: fileManager,
  24. Model: model,
  25. }
  26. return exporter.Update(ctx)
  27. }
  28. type csvExporter struct {
  29. FileManager filemanager.FileManager
  30. Model AllocationModel
  31. }
  32. // Update updates CSV file in cloud storage with new allocation data
  33. func (e *csvExporter) Update(ctx context.Context) error {
  34. allocationDates, err := e.availableAllocationDates()
  35. if err != nil {
  36. return err
  37. }
  38. if len(allocationDates) == 0 {
  39. return errors.New("no data to export from prometheus")
  40. }
  41. resultTmp, err := os.CreateTemp("", "opencost-export-*.csv")
  42. if err != nil {
  43. return err
  44. }
  45. defer closeAndDelete(resultTmp)
  46. previousExportTmp, err := os.CreateTemp("", "opencost-previous-export-*.csv")
  47. if err != nil {
  48. return err
  49. }
  50. defer closeAndDelete(previousExportTmp)
  51. err = e.FileManager.Download(ctx, previousExportTmp)
  52. switch {
  53. case errors.Is(err, filemanager.ErrNotFound):
  54. // there is no previous file, so we need to create it
  55. err := e.writeCSVToWriter(ctx, resultTmp, mapTimeToSlice(allocationDates))
  56. if err != nil {
  57. return err
  58. }
  59. case err != nil:
  60. return err
  61. default:
  62. // existing export file exists
  63. // scan through it and ignore all dates that are already in the file
  64. // avoid modifying existing data or producing duplicates
  65. err := e.updateExportCSV(ctx, previousExportTmp, allocationDates, resultTmp)
  66. if err != nil {
  67. return err
  68. }
  69. }
  70. // we just wrote to the file, so we need to seek to the beginning, so we can read from it
  71. _, err = resultTmp.Seek(0, io.SeekStart)
  72. if err != nil {
  73. return err
  74. }
  75. err = e.FileManager.Upload(ctx, resultTmp)
  76. if err != nil {
  77. return err
  78. }
  79. log.Info("CSV export updated")
  80. return nil
  81. }
  82. func (e *csvExporter) updateExportCSV(ctx context.Context, previousExportTmp *os.File, allocationDates map[time.Time]struct{}, result *os.File) error {
  83. previousDates, err := e.loadDates(previousExportTmp)
  84. if err != nil {
  85. return err
  86. }
  87. for date := range previousDates {
  88. delete(allocationDates, date)
  89. }
  90. if len(allocationDates) == 0 {
  91. log.Info("export file in cloud storage already contain data for all dates, skipping update")
  92. return errNoData
  93. }
  94. newExportTmp, err := os.CreateTemp("", "opencost-new-export-*.csv")
  95. if err != nil {
  96. return err
  97. }
  98. defer closeAndDelete(newExportTmp)
  99. err = e.writeCSVToWriter(ctx, newExportTmp, mapTimeToSlice(allocationDates))
  100. if err != nil {
  101. return err
  102. }
  103. err = mergeCSV([]*os.File{previousExportTmp, newExportTmp}, result)
  104. if err != nil {
  105. return err
  106. }
  107. return nil
  108. }
  109. func (e *csvExporter) availableAllocationDates() (map[time.Time]struct{}, error) {
  110. start, end, err := e.Model.DateRange()
  111. if err != nil {
  112. return nil, err
  113. }
  114. if start != time.Date(start.Year(), start.Month(), start.Day(), 0, 0, 0, 0, time.UTC) {
  115. // start doesn't start from 00:00 UTC, it could be truncated by prometheus retention policy
  116. // skip incomplete data and begin from the day after, otherwise it may corrupt existing data
  117. start = time.Date(start.Year(), start.Month(), start.Day(), 0, 0, 0, 0, time.UTC).AddDate(0, 0, 1)
  118. }
  119. end = time.Date(end.Year(), end.Month(), end.Day(), 0, 0, 0, 0, time.UTC)
  120. dates := make(map[time.Time]struct{})
  121. for date := start; date.Before(end); date = date.AddDate(0, 0, 1) {
  122. dates[date] = struct{}{}
  123. }
  124. if len(dates) == 0 {
  125. return nil, errNoData
  126. }
  127. return dates, nil
  128. }
  129. func (e *csvExporter) writeCSVToWriter(ctx context.Context, w io.Writer, dates []time.Time) error {
  130. fmtFloat := func(f float64) string {
  131. return strconv.FormatFloat(f, 'f', -1, 64)
  132. }
  133. csvWriter := csv.NewWriter(w)
  134. err := csvWriter.Write([]string{
  135. "Date",
  136. "Namespace",
  137. "ControllerKind",
  138. "ControllerName",
  139. "Pod",
  140. "Container",
  141. "CPUCoreUsageAverage",
  142. "CPUCoreRequestAverage",
  143. "RAMBytesUsageAverage",
  144. "RAMBytesRequestAverage",
  145. "NetworkReceiveBytes",
  146. "NetworkTransferBytes",
  147. "GPUs",
  148. "PVBytes",
  149. "CPUCost",
  150. "RAMCost",
  151. "NetworkCost",
  152. "PVCost",
  153. "GPUCost",
  154. "TotalCost",
  155. })
  156. if err != nil {
  157. return err
  158. }
  159. lines := 0
  160. for _, date := range dates {
  161. start := time.Date(date.Year(), date.Month(), date.Day(), 0, 0, 0, 0, time.UTC)
  162. end := start.AddDate(0, 0, 1)
  163. data, err := e.Model.ComputeAllocation(start, end, 5*time.Minute)
  164. if err != nil {
  165. return err
  166. }
  167. log.Infof("fetched %d records for %s", len(data.Allocations), date.Format("2006-01-02"))
  168. for _, alloc := range data.Allocations {
  169. if err := ctx.Err(); err != nil {
  170. return err
  171. }
  172. log.Infof("%f", alloc.TotalCost())
  173. err := csvWriter.Write([]string{
  174. date.Format("2006-01-02"),
  175. alloc.Properties.Namespace,
  176. alloc.Properties.ControllerKind,
  177. alloc.Properties.Controller,
  178. alloc.Properties.Pod,
  179. alloc.Properties.Container,
  180. fmtFloat(alloc.CPUCoreUsageAverage),
  181. fmtFloat(alloc.CPUCoreRequestAverage),
  182. fmtFloat(alloc.RAMBytesUsageAverage),
  183. fmtFloat(alloc.RAMBytesRequestAverage),
  184. fmtFloat(alloc.NetworkReceiveBytes),
  185. fmtFloat(alloc.NetworkTransferBytes),
  186. fmtFloat(alloc.GPUs()),
  187. fmtFloat(alloc.PVBytes()),
  188. fmtFloat(alloc.CPUTotalCost()),
  189. fmtFloat(alloc.RAMTotalCost()),
  190. fmtFloat(alloc.NetworkTotalCost()),
  191. fmtFloat(alloc.PVCost()),
  192. fmtFloat(alloc.GPUCost),
  193. fmtFloat(alloc.TotalCost()),
  194. })
  195. if err != nil {
  196. return err
  197. }
  198. lines++
  199. }
  200. }
  201. if lines == 0 {
  202. return errNoData
  203. }
  204. csvWriter.Flush()
  205. if err := csvWriter.Error(); err != nil {
  206. return err
  207. }
  208. log.Infof("exported %d lines", lines)
  209. return nil
  210. }
  211. // loadDate scans through CSV export file and extract all dates from "Date" column
  212. func (e *csvExporter) loadDates(csvFile *os.File) (map[time.Time]struct{}, error) {
  213. _, err := csvFile.Seek(0, io.SeekStart)
  214. if err != nil {
  215. return nil, fmt.Errorf("seeking to the beginning of csv file: %w", err)
  216. }
  217. csvReader := csv.NewReader(csvFile)
  218. header, err := csvReader.Read()
  219. if err != nil {
  220. return nil, fmt.Errorf("reading csv header: %w", err)
  221. }
  222. dateColIndex := 0
  223. for i, col := range header {
  224. if col == "Date" {
  225. dateColIndex = i
  226. break
  227. }
  228. }
  229. dates := make(map[time.Time]struct{})
  230. for {
  231. row, err := csvReader.Read()
  232. if errors.Is(err, io.EOF) {
  233. break
  234. }
  235. if err != nil {
  236. return nil, fmt.Errorf("reading csv row: %w", err)
  237. }
  238. date, err := time.Parse("2006-01-02", row[dateColIndex])
  239. if err != nil {
  240. return nil, fmt.Errorf("parsing date: %w", err)
  241. }
  242. dates[date] = struct{}{}
  243. }
  244. return dates, nil
  245. }
  246. // mergeCSV merges multiple csv files into one.
  247. // Files may have different headers, but the result will have a header that is a union of all headers.
  248. // The main goal here is to allow changing CSV format without breaking or loosing existing data.
  249. func mergeCSV(input []*os.File, output *os.File) error {
  250. var err error
  251. headers := make([][]string, 0, len(input))
  252. csvReaders := make([]*csv.Reader, 0, len(input))
  253. // first, get information about the result header
  254. for _, file := range input {
  255. _, err = file.Seek(0, io.SeekStart)
  256. if err != nil {
  257. return fmt.Errorf("seeking to the beginning of csv file: %w", err)
  258. }
  259. csvReader := csv.NewReader(file)
  260. header, err := csvReader.Read()
  261. if errors.Is(err, io.EOF) {
  262. // ignore empty files
  263. continue
  264. }
  265. if err != nil {
  266. return fmt.Errorf("reading header of csv file: %w", err)
  267. }
  268. headers = append(headers, header)
  269. csvReaders = append(csvReaders, csvReader)
  270. }
  271. mapping, header := combineHeaders(headers)
  272. csvWriter := csv.NewWriter(output)
  273. err = csvWriter.Write(mergeHeaders(headers))
  274. if err != nil {
  275. return fmt.Errorf("writing header to csv file: %w", err)
  276. }
  277. for csvIndex, csvReader := range csvReaders {
  278. for {
  279. inputLine, err := csvReader.Read()
  280. if errors.Is(err, io.EOF) {
  281. break
  282. }
  283. if err != nil {
  284. return fmt.Errorf("reading csv file line: %w", err)
  285. }
  286. outputLine := make([]string, len(header))
  287. for colIndex := range header {
  288. destColIndex, ok := mapping[csvIndex][colIndex]
  289. if !ok {
  290. continue
  291. }
  292. outputLine[destColIndex] = inputLine[colIndex]
  293. }
  294. err = csvWriter.Write(outputLine)
  295. if err != nil {
  296. return fmt.Errorf("writing line to csv file: %w", err)
  297. }
  298. }
  299. }
  300. csvWriter.Flush()
  301. // check for errors from the Flush
  302. if csvWriter.Error() != nil {
  303. return fmt.Errorf("flushing csv file: %w", csvWriter.Error())
  304. }
  305. return nil
  306. }
  307. func combineHeaders(headers [][]string) ([]map[int]int, []string) {
  308. result := make([]string, 0)
  309. indices := make([]map[int]int, len(headers))
  310. for i, header := range headers {
  311. indices[i] = make(map[int]int)
  312. for j, column := range header {
  313. if !contains(result, column) {
  314. result = append(result, column)
  315. indices[i][j] = len(result) - 1
  316. } else {
  317. indices[i][j] = indexOf(result, column)
  318. }
  319. }
  320. }
  321. return indices, result
  322. }
  323. func mergeHeaders(headers [][]string) []string {
  324. result := make([]string, 0)
  325. for _, header := range headers {
  326. for _, column := range header {
  327. if !contains(result, column) {
  328. result = append(result, column)
  329. }
  330. }
  331. }
  332. return result
  333. }
  334. func contains(slice []string, item string) bool {
  335. for _, element := range slice {
  336. if element == item {
  337. return true
  338. }
  339. }
  340. return false
  341. }
  342. func indexOf(slice []string, element string) int {
  343. for i, e := range slice {
  344. if e == element {
  345. return i
  346. }
  347. }
  348. return -1
  349. }
  350. func mapTimeToSlice(data map[time.Time]struct{}) []time.Time {
  351. result := make([]time.Time, 0, len(data))
  352. for key := range data {
  353. result = append(result, key)
  354. }
  355. sort.Slice(result, func(i, j int) bool {
  356. return result[i].Before(result[j])
  357. })
  358. return result
  359. }
  360. func closeAndDelete(f *os.File) {
  361. if err := f.Close(); err != nil {
  362. log.Errorf("error closing file: %v", err)
  363. }
  364. if err := os.Remove(f.Name()); err != nil {
  365. log.Errorf("error deleting file: %v", err)
  366. }
  367. }