csv_export.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. package costmodel
  2. import (
  3. "context"
  4. "encoding/csv"
  5. "io"
  6. "os"
  7. "sort"
  8. "strconv"
  9. "time"
  10. "github.com/pkg/errors"
  11. "github.com/opencost/opencost/pkg/kubecost"
  12. "github.com/opencost/opencost/pkg/log"
  13. "github.com/opencost/opencost/pkg/storagev2"
  14. )
  15. type AllocationModel interface {
  16. ComputeAllocation(start, end time.Time, resolution time.Duration) (*kubecost.AllocationSet, error)
  17. DateRange() (time.Time, time.Time, error)
  18. }
  19. var errNoData = errors.New("no data")
  20. func UpdateCSV(ctx context.Context, fileManager storagev2.FileManager, model AllocationModel) error {
  21. exporter := &csvExporter{
  22. FileManager: fileManager,
  23. Model: model,
  24. }
  25. return exporter.Update(ctx)
  26. }
  27. type csvExporter struct {
  28. FileManager storagev2.FileManager
  29. Model AllocationModel
  30. }
  31. // Update updates CSV file in cloud storage with new allocation data
  32. // TODO: currently everything is processed in memory, it might be a problem for large clusters
  33. // it's possible to switch to temporary files, but it will require upgrading all storage provider to work with files
  34. func (e *csvExporter) Update(ctx context.Context) error {
  35. allocationDates, err := e.availableAllocationDates()
  36. if err != nil {
  37. return err
  38. }
  39. if len(allocationDates) == 0 {
  40. return errors.New("no data to export from prometheus")
  41. }
  42. previousExportTmp, err := os.CreateTemp("", "export-*.csv")
  43. if err != nil {
  44. return err
  45. }
  46. defer closeAndDelete(previousExportTmp)
  47. err = e.FileManager.Download(ctx, previousExportTmp)
  48. var exist bool
  49. if err != nil {
  50. if !errors.Is(err, storagev2.ErrNotFound) {
  51. return err
  52. }
  53. exist = false
  54. } else {
  55. exist = true
  56. }
  57. resultTmp, err := os.CreateTemp("", "export-*.csv")
  58. if err != nil {
  59. return err
  60. }
  61. defer closeAndDelete(resultTmp)
  62. // cloud storage doesn't have an existing file
  63. // dump all the data exist to the file
  64. if !exist {
  65. err := e.writeCSVToWriter(ctx, resultTmp, mapTimeToSlice(allocationDates))
  66. if err != nil {
  67. return err
  68. }
  69. }
  70. // existing export file exists
  71. // scan through it and ignore all dates that are already in the file
  72. // avoid modifying existing data or producing duplicates
  73. if exist {
  74. csvDates, err := e.loadDates(previousExportTmp)
  75. if err != nil {
  76. return err
  77. }
  78. for date := range csvDates {
  79. delete(allocationDates, date)
  80. }
  81. if len(allocationDates) == 0 {
  82. log.Info("export file in cloud storage already contain data for all dates, skipping update")
  83. return nil
  84. }
  85. newExportTmp, err := os.CreateTemp("", "new-export-*.csv")
  86. if err != nil {
  87. return err
  88. }
  89. defer closeAndDelete(newExportTmp)
  90. err = e.writeCSVToWriter(ctx, newExportTmp, mapTimeToSlice(allocationDates))
  91. if err != nil {
  92. return err
  93. }
  94. err = mergeCSV([]*os.File{previousExportTmp, newExportTmp}, resultTmp)
  95. if err != nil {
  96. return err
  97. }
  98. }
  99. _, err = resultTmp.Seek(0, io.SeekStart)
  100. if err != nil {
  101. return err
  102. }
  103. err = e.FileManager.Upload(ctx, resultTmp)
  104. if err != nil {
  105. return err
  106. }
  107. log.Info("CSV export updated")
  108. return nil
  109. }
  110. func (e *csvExporter) availableAllocationDates() (map[time.Time]struct{}, error) {
  111. start, end, err := e.Model.DateRange()
  112. if err != nil {
  113. return nil, err
  114. }
  115. if start != time.Date(start.Year(), start.Month(), start.Day(), 0, 0, 0, 0, time.UTC) {
  116. // start doesn't start from 00:00 UTC, it could be truncated by prometheus retention policy
  117. // skip incomplete data and begin from the day after, otherwise it may corrupt existing data
  118. start = time.Date(start.Year(), start.Month(), start.Day(), 0, 0, 0, 0, time.UTC).AddDate(0, 0, 1)
  119. }
  120. end = time.Date(end.Year(), end.Month(), end.Day(), 0, 0, 0, 0, time.UTC)
  121. dates := make(map[time.Time]struct{})
  122. for date := start; date.Before(end); date = date.AddDate(0, 0, 1) {
  123. dates[date] = struct{}{}
  124. }
  125. if len(dates) == 0 {
  126. return nil, errNoData
  127. }
  128. return dates, nil
  129. }
  130. func (e *csvExporter) writeCSVToWriter(ctx context.Context, w io.Writer, dates []time.Time) error {
  131. fmtFloat := func(f float64) string {
  132. return strconv.FormatFloat(f, 'f', -1, 64)
  133. }
  134. csvWriter := csv.NewWriter(w)
  135. // TODO: confirm columns we want to export
  136. err := csvWriter.Write([]string{
  137. "Date",
  138. "Namespace",
  139. "ControllerKind",
  140. "ControllerName",
  141. "Pod",
  142. "Container",
  143. "CPUCoreUsageAverage",
  144. "CPUCoreRequestAverage",
  145. "RAMBytesUsageAverage",
  146. "RAMBytesRequestAverage",
  147. "NetworkReceiveBytes",
  148. "NetworkTransferBytes",
  149. "GPUs",
  150. "PVBytes",
  151. "CPUCost",
  152. "RAMCost",
  153. "NetworkCost",
  154. "PVCost",
  155. "GPUCost",
  156. "TotalCost",
  157. })
  158. if err != nil {
  159. return err
  160. }
  161. lines := 0
  162. for _, date := range dates {
  163. start := time.Date(date.Year(), date.Month(), date.Day(), 0, 0, 0, 0, time.UTC)
  164. end := start.AddDate(0, 0, 1)
  165. data, err := e.Model.ComputeAllocation(start, end, 5*time.Minute)
  166. if err != nil {
  167. return err
  168. }
  169. log.Infof("fetched %d records for %s", len(data.Allocations), date.Format("2006-01-02"))
  170. // TODO: does it need to be aggregated by namespace+controller?
  171. // container-level information can be too noisy for most users
  172. for _, alloc := range data.Allocations {
  173. if err := ctx.Err(); err != nil {
  174. return err
  175. }
  176. log.Infof("%f", alloc.TotalCost())
  177. err := csvWriter.Write([]string{
  178. date.Format("2006-01-02"),
  179. alloc.Properties.Namespace,
  180. alloc.Properties.ControllerKind,
  181. alloc.Properties.Controller,
  182. alloc.Properties.Pod,
  183. alloc.Properties.Container,
  184. fmtFloat(alloc.CPUCoreUsageAverage),
  185. fmtFloat(alloc.CPUCoreRequestAverage),
  186. fmtFloat(alloc.RAMBytesUsageAverage),
  187. fmtFloat(alloc.RAMBytesRequestAverage),
  188. fmtFloat(alloc.NetworkReceiveBytes),
  189. fmtFloat(alloc.NetworkTransferBytes),
  190. fmtFloat(alloc.GPUs()),
  191. fmtFloat(alloc.PVBytes()),
  192. fmtFloat(alloc.CPUTotalCost()),
  193. fmtFloat(alloc.RAMTotalCost()),
  194. fmtFloat(alloc.NetworkTotalCost()),
  195. fmtFloat(alloc.PVCost()),
  196. fmtFloat(alloc.GPUCost),
  197. fmtFloat(alloc.TotalCost()),
  198. })
  199. if err != nil {
  200. return err
  201. }
  202. lines++
  203. }
  204. }
  205. if lines == 0 {
  206. return errNoData
  207. }
  208. csvWriter.Flush()
  209. if err := csvWriter.Error(); err != nil {
  210. return err
  211. }
  212. log.Infof("exported %d lines", lines)
  213. return nil
  214. }
  215. // loadDate scans through CSV export file and extract all dates from "Date" column
  216. func (e *csvExporter) loadDates(csvFile *os.File) (map[time.Time]struct{}, error) {
  217. _, err := csvFile.Seek(0, io.SeekStart)
  218. if err != nil {
  219. return nil, errors.Wrap(err, "seeking to the beginning of csv file")
  220. }
  221. csvReader := csv.NewReader(csvFile)
  222. header, err := csvReader.Read()
  223. if err != nil {
  224. return nil, errors.Wrap(err, "reading csv header")
  225. }
  226. dateColIndex := 0
  227. for i, col := range header {
  228. if col == "Date" {
  229. dateColIndex = i
  230. break
  231. }
  232. }
  233. dates := make(map[time.Time]struct{})
  234. for {
  235. row, err := csvReader.Read()
  236. if errors.Is(err, io.EOF) {
  237. break
  238. }
  239. if err != nil {
  240. return nil, errors.Wrap(err, "reading csv row")
  241. }
  242. date, err := time.Parse("2006-01-02", row[dateColIndex])
  243. if err != nil {
  244. return nil, errors.Wrap(err, "parsing date")
  245. }
  246. dates[date] = struct{}{}
  247. }
  248. return dates, nil
  249. }
  250. // mergeCSV merges multiple csv files into one.
  251. // Files may have different headers, but the result will have a header that is a union of all headers.
  252. // The main goal here is to allow changing CSV format without breaking or loosing existing data.
  253. func mergeCSV(input []*os.File, output *os.File) error {
  254. var err error
  255. headers := make([][]string, 0, len(input))
  256. csvReaders := make([]*csv.Reader, 0, len(input))
  257. // first, get information about the result header
  258. for _, file := range input {
  259. _, err = file.Seek(0, io.SeekStart)
  260. if err != nil {
  261. return errors.Wrap(err, "seeking to the beginning of csv file")
  262. }
  263. csvReader := csv.NewReader(file)
  264. header, err := csvReader.Read()
  265. if errors.Is(err, io.EOF) {
  266. // ignore empty files
  267. continue
  268. }
  269. if err != nil {
  270. return errors.Wrap(err, "reading header of csv file")
  271. }
  272. headers = append(headers, header)
  273. csvReaders = append(csvReaders, csvReader)
  274. }
  275. mapping, header := combineHeaders(headers)
  276. csvWriter := csv.NewWriter(output)
  277. err = csvWriter.Write(mergeHeaders(headers))
  278. if err != nil {
  279. return errors.Wrap(err, "writing header to csv file")
  280. }
  281. for csvIndex, csvReader := range csvReaders {
  282. for {
  283. inputLine, err := csvReader.Read()
  284. if errors.Is(err, io.EOF) {
  285. break
  286. }
  287. if err != nil {
  288. return errors.Wrap(err, "reading csv file line")
  289. }
  290. outputLine := make([]string, len(header))
  291. for colIndex := range header {
  292. destColIndex, ok := mapping[csvIndex][colIndex]
  293. if !ok {
  294. continue
  295. }
  296. outputLine[destColIndex] = inputLine[colIndex]
  297. }
  298. err = csvWriter.Write(outputLine)
  299. if err != nil {
  300. return errors.Wrap(err, "writing line to csv file")
  301. }
  302. }
  303. }
  304. csvWriter.Flush()
  305. if csvWriter.Error() != nil {
  306. return errors.Wrapf(csvWriter.Error(), "flushing csv file")
  307. }
  308. return nil
  309. }
  310. func combineHeaders(headers [][]string) ([]map[int]int, []string) {
  311. result := make([]string, 0)
  312. indices := make([]map[int]int, len(headers))
  313. for i, header := range headers {
  314. indices[i] = make(map[int]int)
  315. for j, column := range header {
  316. if !contains(result, column) {
  317. result = append(result, column)
  318. indices[i][j] = len(result) - 1
  319. } else {
  320. indices[i][j] = indexOf(result, column)
  321. }
  322. }
  323. }
  324. return indices, result
  325. }
  326. func mergeHeaders(headers [][]string) []string {
  327. result := make([]string, 0)
  328. for _, header := range headers {
  329. for _, column := range header {
  330. if !contains(result, column) {
  331. result = append(result, column)
  332. }
  333. }
  334. }
  335. return result
  336. }
  337. func contains(slice []string, item string) bool {
  338. for _, element := range slice {
  339. if element == item {
  340. return true
  341. }
  342. }
  343. return false
  344. }
  345. func indexOf(slice []string, element string) int {
  346. for i, e := range slice {
  347. if e == element {
  348. return i
  349. }
  350. }
  351. return -1
  352. }
  353. func mapTimeToSlice(data map[time.Time]struct{}) []time.Time {
  354. result := make([]time.Time, 0, len(data))
  355. for key := range data {
  356. result = append(result, key)
  357. }
  358. sort.Slice(result, func(i, j int) bool {
  359. return result[i].Before(result[j])
  360. })
  361. return result
  362. }
  363. func closeAndDelete(f *os.File) {
  364. if err := f.Close(); err != nil {
  365. log.Errorf("error closing file: %v", err)
  366. }
  367. if err := os.Remove(f.Name()); err != nil {
  368. log.Errorf("error deleting file: %v", err)
  369. }
  370. }