athenaquerier.go 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. package aws
  2. import (
  3. "context"
  4. "fmt"
  5. "regexp"
  6. "strconv"
  7. "strings"
  8. "time"
  9. "github.com/opencost/opencost/pkg/cloud"
  10. cloudconfig "github.com/opencost/opencost/pkg/cloud/config"
  11. "github.com/aws/aws-sdk-go-v2/aws"
  12. "github.com/aws/aws-sdk-go-v2/service/athena"
  13. "github.com/aws/aws-sdk-go-v2/service/athena/types"
  14. "github.com/opencost/opencost/pkg/kubecost"
  15. "github.com/opencost/opencost/pkg/log"
  16. "github.com/opencost/opencost/pkg/util/stringutil"
  17. )
  18. type AthenaQuerier struct {
  19. AthenaConfiguration
  20. ConnectionStatus cloud.ConnectionStatus
  21. }
  22. func (aq *AthenaQuerier) GetStatus() cloud.ConnectionStatus {
  23. // initialize status if it has not done so; this can happen if the integration is inactive
  24. if aq.ConnectionStatus.String() == "" {
  25. aq.ConnectionStatus = cloud.InitialStatus
  26. }
  27. return aq.ConnectionStatus
  28. }
  29. func (aq *AthenaQuerier) Equals(config cloudconfig.Config) bool {
  30. thatConfig, ok := config.(*AthenaQuerier)
  31. if !ok {
  32. return false
  33. }
  34. return aq.AthenaConfiguration.Equals(&thatConfig.AthenaConfiguration)
  35. }
  36. // GetColumns returns a list of the names of all columns in the configured
  37. // Athena table
  38. func (aq *AthenaQuerier) GetColumns() (map[string]bool, error) {
  39. columnSet := map[string]bool{}
  40. // This Query is supported by Athena tables and views
  41. q := `SELECT column_name FROM information_schema.columns WHERE table_schema = '%s' AND table_name = '%s'`
  42. query := fmt.Sprintf(q, aq.Database, aq.Table)
  43. athenaErr := aq.Query(context.TODO(), query, GetAthenaQueryFunc(func(row types.Row) {
  44. columnSet[*row.Data[0].VarCharValue] = true
  45. }))
  46. if athenaErr != nil {
  47. return columnSet, athenaErr
  48. }
  49. if len(columnSet) == 0 {
  50. log.Infof("No columns retrieved from Athena")
  51. }
  52. return columnSet, nil
  53. }
  54. func (aq *AthenaQuerier) Query(ctx context.Context, query string, fn func(*athena.GetQueryResultsOutput) bool) error {
  55. err := aq.Validate()
  56. if err != nil {
  57. aq.ConnectionStatus = cloud.InvalidConfiguration
  58. return err
  59. }
  60. log.Debugf("AthenaQuerier[%s]: Performing Query: %s", aq.Key(), query)
  61. err = aq.queryAthenaPaginated(ctx, query, fn)
  62. if err != nil {
  63. aq.ConnectionStatus = cloud.FailedConnection
  64. return err
  65. }
  66. return nil
  67. }
  68. func (aq *AthenaQuerier) GetAthenaClient() (*athena.Client, error) {
  69. cfg, err := aq.Authorizer.CreateAWSConfig(aq.Region)
  70. if err != nil {
  71. return nil, err
  72. }
  73. cli := athena.NewFromConfig(cfg)
  74. return cli, nil
  75. }
  76. // QueryAthenaPaginated executes athena query and processes results. An error from this method indicates a
  77. // FAILED_CONNECTION CloudConnectionStatus and should immediately stop the caller to maintain the correct CloudConnectionStatus
  78. func (aq *AthenaQuerier) queryAthenaPaginated(ctx context.Context, query string, fn func(*athena.GetQueryResultsOutput) bool) error {
  79. queryExecutionCtx := &types.QueryExecutionContext{
  80. Database: aws.String(aq.Database),
  81. }
  82. if aq.Catalog != "" {
  83. queryExecutionCtx.Catalog = aws.String(aq.Catalog)
  84. }
  85. resultConfiguration := &types.ResultConfiguration{
  86. OutputLocation: aws.String(aq.Bucket),
  87. }
  88. startQueryExecutionInput := &athena.StartQueryExecutionInput{
  89. QueryString: aws.String(query),
  90. QueryExecutionContext: queryExecutionCtx,
  91. ResultConfiguration: resultConfiguration,
  92. }
  93. // Only set if there is a value, the default input is nil
  94. if aq.Workgroup != "" {
  95. startQueryExecutionInput.WorkGroup = aws.String(aq.Workgroup)
  96. }
  97. // Create Athena Client
  98. cli, err := aq.GetAthenaClient()
  99. if err != nil {
  100. return fmt.Errorf("QueryAthenaPaginated: GetAthenaClient error: %s", err.Error())
  101. }
  102. // Query Athena
  103. startQueryExecutionOutput, err := cli.StartQueryExecution(ctx, startQueryExecutionInput)
  104. if err != nil {
  105. return fmt.Errorf("QueryAthenaPaginated: start query error: %s", err.Error())
  106. }
  107. err = waitForQueryToComplete(ctx, cli, startQueryExecutionOutput.QueryExecutionId)
  108. if err != nil {
  109. return fmt.Errorf("QueryAthenaPaginated: query execution error: %s", err.Error())
  110. }
  111. queryResultsInput := &athena.GetQueryResultsInput{
  112. QueryExecutionId: startQueryExecutionOutput.QueryExecutionId,
  113. MaxResults: aws.Int32(1000), // this is the default value
  114. }
  115. getQueryResultsPaginator := athena.NewGetQueryResultsPaginator(cli, queryResultsInput)
  116. for getQueryResultsPaginator.HasMorePages() {
  117. pg, err := getQueryResultsPaginator.NextPage(ctx)
  118. if err != nil {
  119. log.Errorf("queryAthenaPaginated: NextPage error: %s", err.Error())
  120. continue
  121. }
  122. fn(pg)
  123. }
  124. return nil
  125. }
  126. func waitForQueryToComplete(ctx context.Context, client *athena.Client, queryExecutionID *string) error {
  127. inp := &athena.GetQueryExecutionInput{
  128. QueryExecutionId: queryExecutionID,
  129. }
  130. isQueryStillRunning := true
  131. for isQueryStillRunning {
  132. qe, err := client.GetQueryExecution(ctx, inp)
  133. if err != nil {
  134. return err
  135. }
  136. if qe.QueryExecution.Status.State == "SUCCEEDED" {
  137. isQueryStillRunning = false
  138. continue
  139. }
  140. if qe.QueryExecution.Status.State != "RUNNING" && qe.QueryExecution.Status.State != "QUEUED" {
  141. return fmt.Errorf("no query results available for query %s", *queryExecutionID)
  142. }
  143. time.Sleep(2 * time.Second)
  144. }
  145. return nil
  146. }
  147. // GetAthenaRowValue retrieve value from athena row based on column names and used stringutil.Bank() to prevent duplicate
  148. // allocation of strings
  149. func GetAthenaRowValue(row types.Row, queryColumnIndexes map[string]int, columnName string) string {
  150. columnIndex, ok := queryColumnIndexes[columnName]
  151. if !ok {
  152. return ""
  153. }
  154. valuePointer := row.Data[columnIndex].VarCharValue
  155. if valuePointer == nil {
  156. return ""
  157. }
  158. return stringutil.Bank(*valuePointer)
  159. }
  160. // getAthenaRowValueFloat retrieve value from athena row based on column names and convert to float if possible
  161. func GetAthenaRowValueFloat(row types.Row, queryColumnIndexes map[string]int, columnName string) (float64, error) {
  162. columnIndex, ok := queryColumnIndexes[columnName]
  163. if !ok {
  164. return 0.0, fmt.Errorf("getAthenaRowValueFloat: missing column index: %s", columnName)
  165. }
  166. valuePointer := row.Data[columnIndex].VarCharValue
  167. if valuePointer == nil {
  168. return 0.0, fmt.Errorf("getAthenaRowValueFloat: nil field")
  169. }
  170. cost, err := strconv.ParseFloat(*valuePointer, 64)
  171. if err != nil {
  172. return cost, fmt.Errorf("getAthenaRowValueFloat: failed to parse %s: '%s': %s", columnName, *valuePointer, err.Error())
  173. }
  174. return cost, nil
  175. }
  176. func SelectAWSCategory(providerID, usageType, service string) string {
  177. // Network has the highest priority and is based on the usage type ending in "Bytes"
  178. if strings.HasSuffix(usageType, "Bytes") {
  179. return kubecost.NetworkCategory
  180. }
  181. // The node and volume conditions are mutually exclusive.
  182. // Provider ID has prefix "i-"
  183. if strings.HasPrefix(providerID, "i-") {
  184. return kubecost.ComputeCategory
  185. }
  186. // Provider ID has prefix "vol-"
  187. if strings.HasPrefix(providerID, "vol-") {
  188. return kubecost.StorageCategory
  189. }
  190. // Default categories based on service
  191. switch strings.ToUpper(service) {
  192. case "AWSELB", "AWSGLUE", "AMAZONROUTE53":
  193. return kubecost.NetworkCategory
  194. case "AMAZONEC2", "AWSLAMBDA", "AMAZONELASTICACHE":
  195. return kubecost.ComputeCategory
  196. case "AMAZONEKS":
  197. // Check if line item is a fargate pod
  198. if strings.Contains(providerID, ":pod/") {
  199. return kubecost.ComputeCategory
  200. }
  201. return kubecost.ManagementCategory
  202. case "AMAZONS3", "AMAZONATHENA", "AMAZONRDS", "AMAZONDYNAMODB", "AWSSECRETSMANAGER", "AMAZONFSX":
  203. return kubecost.StorageCategory
  204. default:
  205. return kubecost.OtherCategory
  206. }
  207. }
  208. var parseARNRx = regexp.MustCompile("^.+\\/(.+)?") // Capture "a406f7761142e4ef58a8f2ba478d2db2" from "arn:aws:elasticloadbalancing:us-east-1:297945954695:loadbalancer/a406f7761142e4ef58a8f2ba478d2db2"
  209. func ParseARN(id string) string {
  210. match := parseARNRx.FindStringSubmatch(id)
  211. if len(match) == 0 {
  212. if id != "" {
  213. log.DedupedInfof(10, "aws.parseARN: failed to parse %s", id)
  214. }
  215. return id
  216. }
  217. return match[len(match)-1]
  218. }
  219. func GetAthenaQueryFunc(fn func(types.Row)) func(*athena.GetQueryResultsOutput) bool {
  220. pageNum := 0
  221. processItemQueryResults := func(page *athena.GetQueryResultsOutput) bool {
  222. if page == nil {
  223. log.Errorf("AthenaQuerier: Athena page is nil")
  224. return false
  225. } else if page.ResultSet == nil {
  226. log.Errorf("AthenaQuerier: Athena page.ResultSet is nil")
  227. return false
  228. }
  229. rows := page.ResultSet.Rows
  230. if pageNum == 0 {
  231. rows = page.ResultSet.Rows[1:len(page.ResultSet.Rows)]
  232. }
  233. for _, row := range rows {
  234. fn(row)
  235. }
  236. pageNum++
  237. return true
  238. }
  239. return processItemQueryResults
  240. }