storagebillingparser.go 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. package azure
  2. import (
  3. "context"
  4. "encoding/csv"
  5. "encoding/json"
  6. "fmt"
  7. "io"
  8. "os"
  9. "path/filepath"
  10. "strings"
  11. "time"
  12. "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
  13. "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container"
  14. "github.com/opencost/opencost/core/pkg/log"
  15. "github.com/opencost/opencost/pkg/cloud"
  16. "github.com/opencost/opencost/pkg/env"
  17. )
  18. // AzureStorageBillingParser accesses billing data stored in CSV files in Azure Storage
  19. type AzureStorageBillingParser struct {
  20. StorageConnection
  21. }
  22. func (asbp *AzureStorageBillingParser) Equals(config cloud.Config) bool {
  23. thatConfig, ok := config.(*AzureStorageBillingParser)
  24. if !ok {
  25. return false
  26. }
  27. return asbp.StorageConnection.Equals(&thatConfig.StorageConnection)
  28. }
  29. type AzureBillingResultFunc func(*BillingRowValues) error
  30. func (asbp *AzureStorageBillingParser) ParseBillingData(start, end time.Time, resultFn AzureBillingResultFunc) error {
  31. err := asbp.Validate()
  32. if err != nil {
  33. asbp.ConnectionStatus = cloud.InvalidConfiguration
  34. return err
  35. }
  36. serviceURL := fmt.Sprintf(asbp.StorageConnection.getBlobURLTemplate(), asbp.Account, "")
  37. client, err := asbp.Authorizer.GetBlobClient(serviceURL)
  38. if err != nil {
  39. asbp.ConnectionStatus = cloud.FailedConnection
  40. return err
  41. }
  42. ctx := context.Background()
  43. // most recent blob list contains information on blob including name and lastMod time
  44. // Example blobNames: [ export/myExport/20240101-20240131/myExport_758a42af-0731-4edb-b498-1e523bb40f12.csv ]
  45. blobInfos, err := asbp.getMostRecentBlobs(start, end, client, ctx)
  46. if err != nil {
  47. asbp.ConnectionStatus = cloud.FailedConnection
  48. return err
  49. }
  50. if len(blobInfos) == 0 && asbp.ConnectionStatus != cloud.SuccessfulConnection {
  51. asbp.ConnectionStatus = cloud.MissingData
  52. return nil
  53. }
  54. if env.IsAzureDownloadBillingDataToDisk() {
  55. // clean up old files that have been saved to disk before downloading new ones
  56. localPath := filepath.Join(env.GetConfigPathWithDefault(env.DefaultConfigMountPath), "db", "cloudcost")
  57. if _, err := asbp.deleteFilesOlderThan7d(localPath); err != nil {
  58. log.Warnf("CloudCost: Azure: ParseBillingData: failed to remove the following stale files: %v", err)
  59. }
  60. for _, blob := range blobInfos {
  61. blobName := *blob.Name
  62. // Use entire blob name to prevent collision with other files from previous months or other integrations (ex "part_0_0001.csv")
  63. localFilePath := filepath.Join(localPath, strings.ReplaceAll(blobName, "/", "_"))
  64. err := asbp.DownloadBlobToFile(localFilePath, blob, client, ctx)
  65. if err != nil {
  66. asbp.ConnectionStatus = cloud.FailedConnection
  67. return err
  68. }
  69. fp, err := os.Open(localFilePath)
  70. if err != nil {
  71. asbp.ConnectionStatus = cloud.FailedConnection
  72. return err
  73. }
  74. defer fp.Close()
  75. err = asbp.parseCSV(start, end, csv.NewReader(fp), resultFn)
  76. if err != nil {
  77. asbp.ConnectionStatus = cloud.ParseError
  78. return err
  79. }
  80. }
  81. } else {
  82. for _, blobInfo := range blobInfos {
  83. blobName := *blobInfo.Name
  84. streamReader, err2 := asbp.StreamBlob(blobName, client)
  85. if err2 != nil {
  86. asbp.ConnectionStatus = cloud.FailedConnection
  87. return err2
  88. }
  89. err2 = asbp.parseCSV(start, end, csv.NewReader(streamReader), resultFn)
  90. if err2 != nil {
  91. asbp.ConnectionStatus = cloud.ParseError
  92. return err2
  93. }
  94. }
  95. }
  96. asbp.ConnectionStatus = cloud.SuccessfulConnection
  97. return nil
  98. }
  99. func (asbp *AzureStorageBillingParser) parseCSV(start, end time.Time, reader *csv.Reader, resultFn AzureBillingResultFunc) error {
  100. headers, err := reader.Read()
  101. if err != nil {
  102. return err
  103. }
  104. abp, err := NewBillingParseSchema(headers)
  105. if err != nil {
  106. return err
  107. }
  108. for {
  109. var record, err = reader.Read()
  110. if err == io.EOF {
  111. break
  112. }
  113. if err != nil {
  114. return err
  115. }
  116. abv := abp.ParseRow(start, end, record)
  117. if abv == nil {
  118. continue
  119. }
  120. err = resultFn(abv)
  121. if err != nil {
  122. return err
  123. }
  124. }
  125. return nil
  126. }
  127. // getMostRecentBlobs returns a list of blobs in the Azure Storage
  128. // Container. It uses the "Last Modified Time" of the file to determine which
  129. // has the latest month-to-date billing data.
  130. func (asbp *AzureStorageBillingParser) getMostRecentBlobs(start, end time.Time, client *azblob.Client, ctx context.Context) ([]container.BlobItem, error) {
  131. log.Infof("Azure Storage: retrieving most recent reports from: %v - %v", start, end)
  132. // Get list of month substrings for months contained in the start to end range
  133. monthStrs, err := asbp.getMonthStrings(start, end)
  134. if err != nil {
  135. return nil, err
  136. }
  137. // Build map of blobs keyed by month string and blob name
  138. blobsForMonth := make(map[string]map[string]container.BlobItem)
  139. pager := client.NewListBlobsFlatPager(asbp.Container, &azblob.ListBlobsFlatOptions{
  140. Include: container.ListBlobsInclude{Deleted: false, Versions: false},
  141. })
  142. for pager.More() {
  143. resp, err := pager.NextPage(ctx)
  144. if err != nil {
  145. return nil, err
  146. }
  147. // Using the list of months strings find the most resent blob for each month in the range
  148. for _, blobInfo := range resp.Segment.BlobItems {
  149. if blobInfo.Name == nil {
  150. continue
  151. }
  152. // If Container Path configuration exists, check if it is in the blobs name
  153. if asbp.Path != "" && !strings.Contains(*blobInfo.Name, asbp.Path) {
  154. continue
  155. }
  156. for _, month := range monthStrs {
  157. if strings.Contains(*blobInfo.Name, month) {
  158. if _, ok := blobsForMonth[month]; !ok {
  159. blobsForMonth[month] = make(map[string]container.BlobItem)
  160. }
  161. blobsForMonth[month][*blobInfo.Name] = *blobInfo
  162. }
  163. }
  164. }
  165. }
  166. // build list of most recent blobs that are needed to fulfil a query on the give date range
  167. var blobs []container.BlobItem
  168. for _, monthBlobs := range blobsForMonth {
  169. // Find most recent blob
  170. var mostRecentBlob *container.BlobItem
  171. var mostRecentManifest *container.BlobItem
  172. for name := range monthBlobs {
  173. blob := monthBlobs[name]
  174. lastMod := *blob.Properties.LastModified
  175. // Handle manifest files
  176. if strings.HasSuffix(*blob.Name, "manifest.json") {
  177. if mostRecentManifest == nil {
  178. mostRecentManifest = &blob
  179. continue
  180. }
  181. if mostRecentManifest.Properties.LastModified.Before(lastMod) {
  182. mostRecentManifest = &blob
  183. }
  184. // Only look at non-manifest blobs if manifests are not present
  185. } else if mostRecentManifest == nil {
  186. if mostRecentBlob == nil {
  187. mostRecentBlob = &blob
  188. continue
  189. }
  190. if mostRecentBlob.Properties.LastModified.Before(lastMod) {
  191. mostRecentBlob = &blob
  192. }
  193. }
  194. }
  195. // In the absence of a manifest, add the most recent blob
  196. if mostRecentManifest == nil {
  197. if mostRecentBlob != nil {
  198. blobs = append(blobs, *mostRecentBlob)
  199. }
  200. continue
  201. }
  202. // download manifest for the month
  203. manifestBytes, err := asbp.DownloadBlob(*mostRecentManifest.Name, client, ctx)
  204. if err != nil {
  205. return nil, fmt.Errorf("failed to retrieve manifest %w", err)
  206. }
  207. var manifest manifestJson
  208. err = json.Unmarshal(manifestBytes, &manifest)
  209. if err != nil {
  210. return nil, fmt.Errorf("failed to unmarshal manifest %w", err)
  211. }
  212. // Add all partitioned blobs named in the manifest to the list of blobs to be retrieved
  213. for _, mb := range manifest.Blobs {
  214. namedBlob, ok := monthBlobs[mb.BlobName]
  215. if !ok {
  216. log.Errorf("AzureStorage: failed to find blob named in manifest '%s'", mb.BlobName)
  217. continue
  218. }
  219. blobs = append(blobs, namedBlob)
  220. }
  221. }
  222. return blobs, nil
  223. }
  224. // manifestJson is a struct for unmarshalling manifest.json files associated with the azure billing export
  225. type manifestJson struct {
  226. Blobs []manifestBlob `json:"blobs"`
  227. }
  228. type manifestBlob struct {
  229. BlobName string `json:"blobName"`
  230. }
  231. // getMonthStrings returns a list of month strings in the format
  232. // "YYYYMMDD-YYYYMMDD", where the dates are exactly the first and last day of
  233. // the month. It includes all month strings which would capture the start and
  234. // end parameters.
  235. // For example: ["20240201-20240229", "20240101-20240131", "20231201-20231231"]
  236. func (asbp *AzureStorageBillingParser) getMonthStrings(start, end time.Time) ([]string, error) {
  237. if start.After(end) {
  238. return []string{}, fmt.Errorf("start date must be before end date")
  239. }
  240. if end.After(time.Now()) {
  241. end = time.Now()
  242. }
  243. var monthStrs []string
  244. monthStr := asbp.timeToMonthString(start)
  245. endStr := asbp.timeToMonthString(end)
  246. monthStrs = append(monthStrs, monthStr)
  247. currMonth := start.AddDate(0, 0, -start.Day()+1)
  248. for monthStr != endStr {
  249. currMonth = currMonth.AddDate(0, 1, 0)
  250. monthStr = asbp.timeToMonthString(currMonth)
  251. monthStrs = append(monthStrs, monthStr)
  252. }
  253. return monthStrs, nil
  254. }
  255. func (asbp *AzureStorageBillingParser) timeToMonthString(input time.Time) string {
  256. format := "20060102"
  257. startOfMonth := input.AddDate(0, 0, -input.Day()+1)
  258. endOfMonth := input.AddDate(0, 1, -input.Day())
  259. return startOfMonth.Format(format) + "-" + endOfMonth.Format(format)
  260. }