athenaquerier.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. package aws
  2. import (
  3. "context"
  4. "fmt"
  5. "regexp"
  6. "strconv"
  7. "strings"
  8. "time"
  9. "github.com/aws/aws-sdk-go-v2/aws"
  10. "github.com/aws/aws-sdk-go-v2/service/athena"
  11. "github.com/aws/aws-sdk-go-v2/service/athena/types"
  12. "github.com/opencost/opencost/core/pkg/log"
  13. "github.com/opencost/opencost/core/pkg/opencost"
  14. "github.com/opencost/opencost/core/pkg/util/stringutil"
  15. "github.com/opencost/opencost/pkg/cloud"
  16. )
  17. type AthenaQuerier struct {
  18. AthenaConfiguration
  19. ConnectionStatus cloud.ConnectionStatus
  20. }
  21. func (aq *AthenaQuerier) GetStatus() cloud.ConnectionStatus {
  22. // initialize status if it has not done so; this can happen if the integration is inactive
  23. if aq.ConnectionStatus.String() == "" {
  24. aq.ConnectionStatus = cloud.InitialStatus
  25. }
  26. return aq.ConnectionStatus
  27. }
  28. func (aq *AthenaQuerier) Equals(config cloud.Config) bool {
  29. thatConfig, ok := config.(*AthenaQuerier)
  30. if !ok {
  31. return false
  32. }
  33. return aq.AthenaConfiguration.Equals(&thatConfig.AthenaConfiguration)
  34. }
  35. // GetColumns returns a list of the names of all columns in the configured
  36. // Athena table
  37. func (aq *AthenaQuerier) GetColumns() (map[string]bool, error) {
  38. columnSet := map[string]bool{}
  39. // This Query is supported by Athena tables and views
  40. q := `SELECT column_name FROM information_schema.columns WHERE table_schema = '%s' AND table_name = '%s'`
  41. query := fmt.Sprintf(q, aq.Database, aq.Table)
  42. athenaErr := aq.Query(context.TODO(), query, GetAthenaQueryFunc(func(row types.Row) {
  43. columnSet[*row.Data[0].VarCharValue] = true
  44. }))
  45. if athenaErr != nil {
  46. return columnSet, athenaErr
  47. }
  48. if len(columnSet) == 0 {
  49. log.Infof("No columns retrieved from Athena")
  50. }
  51. return columnSet, nil
  52. }
  53. // HasBillingPeriodPartitions checks if the table uses billing_period partitioning
  54. // by querying SHOW PARTITIONS and looking for billing_period partition keys
  55. func (aq *AthenaQuerier) HasBillingPeriodPartitions() (bool, error) {
  56. // Use SHOW PARTITIONS to check if billing_period partitions exist
  57. query := fmt.Sprintf("SHOW PARTITIONS \"%s\"", aq.Table)
  58. hasBillingPeriodPartition := false
  59. athenaErr := aq.Query(context.TODO(), query, GetAthenaQueryFunc(func(row types.Row) {
  60. if len(row.Data) > 0 && row.Data[0].VarCharValue != nil {
  61. partitionValue := *row.Data[0].VarCharValue
  62. // Check if partition follows billing_period=YYYY-MM format
  63. if strings.HasPrefix(partitionValue, "billing_period=") {
  64. hasBillingPeriodPartition = true
  65. }
  66. }
  67. }))
  68. if athenaErr != nil {
  69. // If SHOW PARTITIONS fails, assume no billing_period partitions
  70. log.Debugf("AthenaQuerier[%s]: SHOW PARTITIONS failed: %s", aq.Key(), athenaErr.Error())
  71. return false, athenaErr
  72. }
  73. return hasBillingPeriodPartition, nil
  74. }
  75. func (aq *AthenaQuerier) Query(ctx context.Context, query string, fn func(*athena.GetQueryResultsOutput) bool) error {
  76. err := aq.Validate()
  77. if err != nil {
  78. aq.ConnectionStatus = cloud.InvalidConfiguration
  79. return err
  80. }
  81. log.Debugf("AthenaQuerier[%s]: Performing Query: %s", aq.Key(), query)
  82. err = aq.queryAthenaPaginated(ctx, query, fn)
  83. if err != nil {
  84. aq.ConnectionStatus = cloud.FailedConnection
  85. return err
  86. }
  87. return nil
  88. }
  89. func (aq *AthenaQuerier) GetAthenaClient() (*athena.Client, error) {
  90. cfg, err := aq.Authorizer.CreateAWSConfig(aq.Region)
  91. if err != nil {
  92. return nil, err
  93. }
  94. cli := athena.NewFromConfig(cfg)
  95. return cli, nil
  96. }
  97. // QueryAthenaPaginated executes athena query and processes results. An error from this method indicates a
  98. // FAILED_CONNECTION CloudConnectionStatus and should immediately stop the caller to maintain the correct CloudConnectionStatus
  99. func (aq *AthenaQuerier) queryAthenaPaginated(ctx context.Context, query string, fn func(*athena.GetQueryResultsOutput) bool) error {
  100. queryExecutionCtx := &types.QueryExecutionContext{
  101. Database: aws.String(aq.Database),
  102. }
  103. if aq.Catalog != "" {
  104. queryExecutionCtx.Catalog = aws.String(aq.Catalog)
  105. }
  106. resultConfiguration := &types.ResultConfiguration{
  107. OutputLocation: aws.String(aq.Bucket),
  108. }
  109. startQueryExecutionInput := &athena.StartQueryExecutionInput{
  110. QueryString: aws.String(query),
  111. QueryExecutionContext: queryExecutionCtx,
  112. ResultConfiguration: resultConfiguration,
  113. }
  114. // Only set if there is a value, the default input is nil
  115. if aq.Workgroup != "" {
  116. startQueryExecutionInput.WorkGroup = aws.String(aq.Workgroup)
  117. }
  118. // Create Athena Client
  119. cli, err := aq.GetAthenaClient()
  120. if err != nil {
  121. return fmt.Errorf("QueryAthenaPaginated: GetAthenaClient error: %s", err.Error())
  122. }
  123. // Query Athena
  124. startQueryExecutionOutput, err := cli.StartQueryExecution(ctx, startQueryExecutionInput)
  125. if err != nil {
  126. return fmt.Errorf("QueryAthenaPaginated: start query error: %s", err.Error())
  127. }
  128. err = waitForQueryToComplete(ctx, cli, startQueryExecutionOutput.QueryExecutionId)
  129. if err != nil {
  130. return fmt.Errorf("QueryAthenaPaginated: query execution error: %s", err.Error())
  131. }
  132. queryResultsInput := &athena.GetQueryResultsInput{
  133. QueryExecutionId: startQueryExecutionOutput.QueryExecutionId,
  134. MaxResults: aws.Int32(1000), // this is the default value
  135. }
  136. getQueryResultsPaginator := athena.NewGetQueryResultsPaginator(cli, queryResultsInput)
  137. for getQueryResultsPaginator.HasMorePages() {
  138. pg, err := getQueryResultsPaginator.NextPage(ctx)
  139. if err != nil {
  140. log.Errorf("queryAthenaPaginated: NextPage error: %s", err.Error())
  141. continue
  142. }
  143. fn(pg)
  144. }
  145. return nil
  146. }
  147. func waitForQueryToComplete(ctx context.Context, client *athena.Client, queryExecutionID *string) error {
  148. inp := &athena.GetQueryExecutionInput{
  149. QueryExecutionId: queryExecutionID,
  150. }
  151. isQueryStillRunning := true
  152. for isQueryStillRunning {
  153. qe, err := client.GetQueryExecution(ctx, inp)
  154. if err != nil {
  155. return err
  156. }
  157. if qe.QueryExecution.Status.State == "SUCCEEDED" {
  158. isQueryStillRunning = false
  159. continue
  160. }
  161. if qe.QueryExecution.Status.State != "RUNNING" && qe.QueryExecution.Status.State != "QUEUED" {
  162. return fmt.Errorf("no query results available for query %s", *queryExecutionID)
  163. }
  164. time.Sleep(2 * time.Second)
  165. }
  166. return nil
  167. }
  168. // GetAthenaRowValue retrieve value from athena row based on column names and used stringutil.Bank() to prevent duplicate
  169. // allocation of strings
  170. func GetAthenaRowValue(row types.Row, queryColumnIndexes map[string]int, columnName string) string {
  171. columnIndex, ok := queryColumnIndexes[columnName]
  172. if !ok {
  173. return ""
  174. }
  175. valuePointer := row.Data[columnIndex].VarCharValue
  176. if valuePointer == nil {
  177. return ""
  178. }
  179. return stringutil.Bank(*valuePointer)
  180. }
  181. // getAthenaRowValueFloat retrieve value from athena row based on column names and convert to float if possible
  182. func GetAthenaRowValueFloat(row types.Row, queryColumnIndexes map[string]int, columnName string) (float64, error) {
  183. columnIndex, ok := queryColumnIndexes[columnName]
  184. if !ok {
  185. return 0.0, fmt.Errorf("getAthenaRowValueFloat: missing column index: %s", columnName)
  186. }
  187. valuePointer := row.Data[columnIndex].VarCharValue
  188. if valuePointer == nil {
  189. return 0.0, fmt.Errorf("getAthenaRowValueFloat: nil field")
  190. }
  191. cost, err := strconv.ParseFloat(*valuePointer, 64)
  192. if err != nil {
  193. return cost, fmt.Errorf("getAthenaRowValueFloat: failed to parse %s: '%s': %s", columnName, *valuePointer, err.Error())
  194. }
  195. return cost, nil
  196. }
  197. func SelectAWSCategory(providerID, usageType, service string) string {
  198. // Network has the highest priority and is based on the usage type ending in "Bytes"
  199. if strings.HasSuffix(usageType, "Bytes") {
  200. return opencost.NetworkCategory
  201. }
  202. // The node and volume conditions are mutually exclusive.
  203. // Provider ID has prefix "i-"
  204. if strings.HasPrefix(providerID, "i-") {
  205. // GuardDuty has a ProviderID prefix of "i-", but should not be categorized as compute
  206. if strings.ToUpper(service) == "AMAZONGUARDDUTY" {
  207. return opencost.OtherCategory
  208. }
  209. return opencost.ComputeCategory
  210. }
  211. // Provider ID has prefix "vol-"
  212. if strings.HasPrefix(providerID, "vol-") {
  213. return opencost.StorageCategory
  214. }
  215. // Default categories based on service
  216. switch strings.ToUpper(service) {
  217. case "AWSELB", "AWSGLUE", "AMAZONROUTE53":
  218. return opencost.NetworkCategory
  219. case "AMAZONEC2", "AWSLAMBDA", "AMAZONELASTICACHE":
  220. return opencost.ComputeCategory
  221. case "AMAZONEKS":
  222. // Check if line item is a fargate pod
  223. if strings.Contains(providerID, ":pod/") {
  224. return opencost.ComputeCategory
  225. }
  226. return opencost.ManagementCategory
  227. case "AMAZONS3", "AMAZONATHENA", "AMAZONRDS", "AMAZONDYNAMODB", "AWSSECRETSMANAGER", "AMAZONFSX":
  228. return opencost.StorageCategory
  229. default:
  230. return opencost.OtherCategory
  231. }
  232. }
  233. var parseARNRx = regexp.MustCompile("^.+\\/(.+)?") // Capture "a406f7761142e4ef58a8f2ba478d2db2" from "arn:aws:elasticloadbalancing:us-east-1:297945954695:loadbalancer/a406f7761142e4ef58a8f2ba478d2db2"
  234. func ParseARN(id string) string {
  235. match := parseARNRx.FindStringSubmatch(id)
  236. if len(match) == 0 {
  237. if id != "" {
  238. log.DedupedInfof(10, "aws.parseARN: failed to parse %s", id)
  239. }
  240. return id
  241. }
  242. return match[len(match)-1]
  243. }
  244. func GetAthenaQueryFunc(fn func(types.Row)) func(*athena.GetQueryResultsOutput) bool {
  245. pageNum := 0
  246. processItemQueryResults := func(page *athena.GetQueryResultsOutput) bool {
  247. if page == nil {
  248. log.Errorf("AthenaQuerier: Athena page is nil")
  249. return false
  250. } else if page.ResultSet == nil {
  251. log.Errorf("AthenaQuerier: Athena page.ResultSet is nil")
  252. return false
  253. }
  254. rows := page.ResultSet.Rows
  255. if pageNum == 0 {
  256. rows = page.ResultSet.Rows[1:len(page.ResultSet.Rows)]
  257. }
  258. for _, row := range rows {
  259. fn(row)
  260. }
  261. pageNum++
  262. return true
  263. }
  264. return processItemQueryResults
  265. }