storagebillingparser.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. package azure
  2. import (
  3. "context"
  4. "encoding/csv"
  5. "encoding/json"
  6. "fmt"
  7. "io"
  8. "os"
  9. "path/filepath"
  10. "strings"
  11. "time"
  12. "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
  13. "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container"
  14. "github.com/opencost/opencost/core/pkg/log"
  15. "github.com/opencost/opencost/pkg/cloud"
  16. "github.com/opencost/opencost/pkg/env"
  17. )
  18. // AzureStorageBillingParser accesses billing data stored in CSV files in Azure Storage
  19. type AzureStorageBillingParser struct {
  20. StorageConnection
  21. }
  22. func (asbp *AzureStorageBillingParser) Equals(config cloud.Config) bool {
  23. thatConfig, ok := config.(*AzureStorageBillingParser)
  24. if !ok {
  25. return false
  26. }
  27. return asbp.StorageConnection.Equals(&thatConfig.StorageConnection)
  28. }
  29. type AzureBillingResultFunc func(*BillingRowValues) error
  30. func (asbp *AzureStorageBillingParser) ParseBillingData(start, end time.Time, resultFn AzureBillingResultFunc) error {
  31. err := asbp.Validate()
  32. if err != nil {
  33. asbp.ConnectionStatus = cloud.InvalidConfiguration
  34. return err
  35. }
  36. serviceURL := fmt.Sprintf(asbp.StorageConnection.getBlobURLTemplate(), asbp.Account, "")
  37. client, err := asbp.Authorizer.GetBlobClient(serviceURL)
  38. if err != nil {
  39. asbp.ConnectionStatus = cloud.FailedConnection
  40. return err
  41. }
  42. ctx := context.Background()
  43. // most recent blob list contains information on blob including name and lastMod time
  44. // Example blobNames: [ export/myExport/20240101-20240131/myExport_758a42af-0731-4edb-b498-1e523bb40f12.csv ]
  45. blobInfos, err := asbp.getMostRecentBlobs(start, end, client, ctx)
  46. if err != nil {
  47. asbp.ConnectionStatus = cloud.FailedConnection
  48. return err
  49. }
  50. if len(blobInfos) == 0 && asbp.ConnectionStatus != cloud.SuccessfulConnection {
  51. asbp.ConnectionStatus = cloud.MissingData
  52. return nil
  53. }
  54. if env.IsAzureDownloadBillingDataToDisk() {
  55. // clean up old files that have been saved to disk before downloading new ones
  56. localPath := filepath.Join(env.GetConfigPathWithDefault(env.DefaultConfigMountPath), "db", "cloudcost")
  57. if _, err := asbp.deleteFilesOlderThan7d(localPath); err != nil {
  58. log.Warnf("CloudCost: Azure: ParseBillingData: failed to remove the following stale files: %v", err)
  59. }
  60. for _, blob := range blobInfos {
  61. blobName := *blob.Name
  62. localFilePath := filepath.Join(localPath, filepath.Base(blobName))
  63. err := asbp.DownloadBlobToFile(localFilePath, blob, client, ctx)
  64. if err != nil {
  65. asbp.ConnectionStatus = cloud.FailedConnection
  66. return err
  67. }
  68. fp, err := os.Open(localFilePath)
  69. if err != nil {
  70. asbp.ConnectionStatus = cloud.FailedConnection
  71. return err
  72. }
  73. defer fp.Close()
  74. err = asbp.parseCSV(start, end, csv.NewReader(fp), resultFn)
  75. if err != nil {
  76. asbp.ConnectionStatus = cloud.ParseError
  77. return err
  78. }
  79. }
  80. } else {
  81. for _, blobInfo := range blobInfos {
  82. blobName := *blobInfo.Name
  83. streamReader, err2 := asbp.StreamBlob(blobName, client)
  84. if err2 != nil {
  85. asbp.ConnectionStatus = cloud.FailedConnection
  86. return err2
  87. }
  88. err2 = asbp.parseCSV(start, end, csv.NewReader(streamReader), resultFn)
  89. if err2 != nil {
  90. asbp.ConnectionStatus = cloud.ParseError
  91. return err2
  92. }
  93. }
  94. }
  95. asbp.ConnectionStatus = cloud.SuccessfulConnection
  96. return nil
  97. }
  98. func (asbp *AzureStorageBillingParser) parseCSV(start, end time.Time, reader *csv.Reader, resultFn AzureBillingResultFunc) error {
  99. headers, err := reader.Read()
  100. if err != nil {
  101. return err
  102. }
  103. abp, err := NewBillingParseSchema(headers)
  104. if err != nil {
  105. return err
  106. }
  107. for {
  108. var record, err = reader.Read()
  109. if err == io.EOF {
  110. break
  111. }
  112. if err != nil {
  113. return err
  114. }
  115. abv := abp.ParseRow(start, end, record)
  116. if abv == nil {
  117. continue
  118. }
  119. err = resultFn(abv)
  120. if err != nil {
  121. return err
  122. }
  123. }
  124. return nil
  125. }
  126. // getMostRecentBlobs returns a list of blobs in the Azure Storage
  127. // Container. It uses the "Last Modified Time" of the file to determine which
  128. // has the latest month-to-date billing data.
  129. func (asbp *AzureStorageBillingParser) getMostRecentBlobs(start, end time.Time, client *azblob.Client, ctx context.Context) ([]container.BlobItem, error) {
  130. log.Infof("Azure Storage: retrieving most recent reports from: %v - %v", start, end)
  131. // Get list of month substrings for months contained in the start to end range
  132. monthStrs, err := asbp.getMonthStrings(start, end)
  133. if err != nil {
  134. return nil, err
  135. }
  136. // Build map of blobs keyed by month string and blob name
  137. blobsForMonth := make(map[string]map[string]container.BlobItem)
  138. pager := client.NewListBlobsFlatPager(asbp.Container, &azblob.ListBlobsFlatOptions{
  139. Include: container.ListBlobsInclude{Deleted: false, Versions: false},
  140. })
  141. for pager.More() {
  142. resp, err := pager.NextPage(ctx)
  143. if err != nil {
  144. return nil, err
  145. }
  146. // Using the list of months strings find the most resent blob for each month in the range
  147. for _, blobInfo := range resp.Segment.BlobItems {
  148. if blobInfo.Name == nil {
  149. continue
  150. }
  151. // If Container Path configuration exists, check if it is in the blobs name
  152. if asbp.Path != "" && !strings.Contains(*blobInfo.Name, asbp.Path) {
  153. continue
  154. }
  155. for _, month := range monthStrs {
  156. if strings.Contains(*blobInfo.Name, month) {
  157. if _, ok := blobsForMonth[month]; !ok {
  158. blobsForMonth[month] = make(map[string]container.BlobItem)
  159. }
  160. blobsForMonth[month][*blobInfo.Name] = *blobInfo
  161. }
  162. }
  163. }
  164. }
  165. // build list of most recent blobs that are needed to fulfil a query on the give date range
  166. var blobs []container.BlobItem
  167. for _, monthBlobs := range blobsForMonth {
  168. // Find most recent blob
  169. var mostRecentBlob *container.BlobItem
  170. var mostRecentManifest *container.BlobItem
  171. for name := range monthBlobs {
  172. blob := monthBlobs[name]
  173. lastMod := *blob.Properties.LastModified
  174. // Handle manifest files
  175. if strings.HasSuffix(*blob.Name, "manifest.json") {
  176. if mostRecentManifest == nil {
  177. mostRecentManifest = &blob
  178. continue
  179. }
  180. if mostRecentManifest.Properties.LastModified.Before(lastMod) {
  181. mostRecentManifest = &blob
  182. }
  183. // Only look at non-manifest blobs if manifests are not present
  184. } else if mostRecentManifest == nil {
  185. if mostRecentBlob == nil {
  186. mostRecentBlob = &blob
  187. continue
  188. }
  189. if mostRecentBlob.Properties.LastModified.Before(lastMod) {
  190. mostRecentBlob = &blob
  191. }
  192. }
  193. }
  194. // In the absence of a manifest, add the most recent blob
  195. if mostRecentManifest == nil {
  196. if mostRecentBlob != nil {
  197. blobs = append(blobs, *mostRecentBlob)
  198. }
  199. continue
  200. }
  201. // download manifest for the month
  202. manifestBytes, err := asbp.DownloadBlob(*mostRecentManifest.Name, client, ctx)
  203. if err != nil {
  204. return nil, fmt.Errorf("failed to retrieve manifest %w", err)
  205. }
  206. var manifest manifestJson
  207. err = json.Unmarshal(manifestBytes, &manifest)
  208. if err != nil {
  209. return nil, fmt.Errorf("failed to unmarshal manifest %w", err)
  210. }
  211. // Add all partitioned blobs named in the manifest to the list of blobs to be retrieved
  212. for _, mb := range manifest.Blobs {
  213. namedBlob, ok := monthBlobs[mb.BlobName]
  214. if !ok {
  215. log.Errorf("AzureStorage: failed to find blob named in manifest '%s'", mb.BlobName)
  216. continue
  217. }
  218. blobs = append(blobs, namedBlob)
  219. }
  220. }
  221. return blobs, nil
  222. }
  223. // manifestJson is a struct for unmarshalling manifest.json files associated with the azure billing export
  224. type manifestJson struct {
  225. Blobs []manifestBlob `json:"blobs"`
  226. }
  227. type manifestBlob struct {
  228. BlobName string `json:"blobName"`
  229. }
  230. // getMonthStrings returns a list of month strings in the format
  231. // "YYYYMMDD-YYYYMMDD", where the dates are exactly the first and last day of
  232. // the month. It includes all month strings which would capture the start and
  233. // end parameters.
  234. // For example: ["20240201-20240229", "20240101-20240131", "20231201-20231231"]
  235. func (asbp *AzureStorageBillingParser) getMonthStrings(start, end time.Time) ([]string, error) {
  236. if start.After(end) {
  237. return []string{}, fmt.Errorf("start date must be before end date")
  238. }
  239. if end.After(time.Now()) {
  240. end = time.Now()
  241. }
  242. var monthStrs []string
  243. monthStr := asbp.timeToMonthString(start)
  244. endStr := asbp.timeToMonthString(end)
  245. monthStrs = append(monthStrs, monthStr)
  246. currMonth := start.AddDate(0, 0, -start.Day()+1)
  247. for monthStr != endStr {
  248. currMonth = currMonth.AddDate(0, 1, 0)
  249. monthStr = asbp.timeToMonthString(currMonth)
  250. monthStrs = append(monthStrs, monthStr)
  251. }
  252. return monthStrs, nil
  253. }
  254. func (asbp *AzureStorageBillingParser) timeToMonthString(input time.Time) string {
  255. format := "20060102"
  256. startOfMonth := input.AddDate(0, 0, -input.Day()+1)
  257. endOfMonth := input.AddDate(0, 1, -input.Day())
  258. return startOfMonth.Format(format) + "-" + endOfMonth.Format(format)
  259. }
  260. // deleteFilesOlderThan7d recursively walks the directory specified and deletes
  261. // files which have not been modified in the last 7 days. Returns a list of
  262. // files deleted.
  263. func (asbp *AzureStorageBillingParser) deleteFilesOlderThan7d(localPath string) ([]string, error) {
  264. duration := 7 * 24 * time.Hour
  265. cleaned := []string{}
  266. errs := []string{}
  267. if _, err := os.Stat(localPath); err != nil {
  268. return cleaned, nil // localPath does not exist
  269. }
  270. filepath.Walk(localPath, func(path string, info os.FileInfo, err error) error {
  271. if err != nil {
  272. errs = append(errs, err.Error())
  273. return err
  274. }
  275. if time.Since(info.ModTime()) > duration {
  276. err := os.Remove(path)
  277. if err != nil {
  278. errs = append(errs, err.Error())
  279. }
  280. cleaned = append(cleaned, path)
  281. }
  282. return nil
  283. })
  284. if len(errs) == 0 {
  285. return cleaned, nil
  286. } else {
  287. return cleaned, fmt.Errorf("deleteFilesOlderThan7d: %v", errs)
  288. }
  289. }