query.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473
  1. package prom
  2. import (
  3. "context"
  4. "fmt"
  5. "net/http"
  6. "net/url"
  7. "strconv"
  8. "time"
  9. "github.com/opencost/opencost/core/pkg/log"
  10. "github.com/opencost/opencost/core/pkg/source"
  11. "github.com/opencost/opencost/core/pkg/util/httputil"
  12. "github.com/opencost/opencost/core/pkg/util/json"
  13. "github.com/opencost/opencost/core/pkg/errors"
  14. prometheus "github.com/prometheus/client_golang/api"
  15. v1 "github.com/prometheus/client_golang/api/prometheus/v1"
  16. )
  17. const (
  18. apiPrefix = "/api/v1"
  19. epQuery = apiPrefix + "/query"
  20. epQueryRange = apiPrefix + "/query_range"
  21. )
  22. // ContextFactory is a factory for creating new Contexts for prometheus queries.
  23. type ContextFactory struct {
  24. client prometheus.Client
  25. config *OpenCostPrometheusConfig
  26. }
  27. // NewContextFactory creates a new ContextFactory with the provided prometheus client.
  28. func NewContextFactory(client prometheus.Client, promConfig *OpenCostPrometheusConfig) *ContextFactory {
  29. return &ContextFactory{
  30. client: client,
  31. config: promConfig,
  32. }
  33. }
  34. // NewContext creates a new prometheus query context.
  35. func (cf *ContextFactory) NewContext() *Context {
  36. return NewContext(cf.client, cf.config)
  37. }
  38. // NewContext creates a new named prometheus query context.
  39. func (cf *ContextFactory) NewNamedContext(name string) *Context {
  40. return NewNamedContext(cf.client, cf.config, name)
  41. }
  42. // Context wraps a Prometheus client and provides methods for querying and
  43. // parsing query responses and errors.
  44. type Context struct {
  45. Client prometheus.Client
  46. config *OpenCostPrometheusConfig
  47. name string
  48. errorCollector *source.QueryErrorCollector
  49. }
  50. // NewContext creates a new Prometheus querying context from the given client
  51. func NewContext(client prometheus.Client, config *OpenCostPrometheusConfig) *Context {
  52. var ec source.QueryErrorCollector
  53. return &Context{
  54. Client: client,
  55. config: config,
  56. name: "",
  57. errorCollector: &ec,
  58. }
  59. }
  60. // NewNamedContext creates a new named Prometheus querying context from the given client
  61. func NewNamedContext(client prometheus.Client, config *OpenCostPrometheusConfig, name string) *Context {
  62. ctx := NewContext(client, config)
  63. ctx.name = name
  64. return ctx
  65. }
  66. // Warnings returns the warnings collected from the Context's ErrorCollector
  67. func (ctx *Context) Warnings() []*source.QueryWarning {
  68. return ctx.errorCollector.Warnings()
  69. }
  70. // HasWarnings returns true if the ErrorCollector has warnings.
  71. func (ctx *Context) HasWarnings() bool {
  72. return ctx.errorCollector.IsWarning()
  73. }
  74. // Errors returns the errors collected from the Context's ErrorCollector.
  75. func (ctx *Context) Errors() []*source.QueryError {
  76. return ctx.errorCollector.Errors()
  77. }
  78. // HasErrors returns true if the ErrorCollector has errors
  79. func (ctx *Context) HasErrors() bool {
  80. return ctx.errorCollector.IsError()
  81. }
  82. // ErrorCollection returns the aggregation of errors if there exists errors. Otherwise,
  83. // nil is returned
  84. func (ctx *Context) ErrorCollection() error {
  85. if ctx.errorCollector.IsError() {
  86. // errorCollector implements the error interface
  87. return ctx.errorCollector
  88. }
  89. return nil
  90. }
  91. // Query returns a QueryResultsChan, then runs the given query and sends the
  92. // results on the provided channel. Receiver is responsible for closing the
  93. // channel, preferably using the Read method.
  94. func (ctx *Context) Query(query string) source.QueryResultsChan {
  95. resCh := make(source.QueryResultsChan)
  96. go runQuery(query, ctx, resCh, time.Now(), "")
  97. return resCh
  98. }
  99. // QueryAtTime returns a QueryResultsChan, then runs the given query at the
  100. // given time (see time parameter here: https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries)
  101. // and sends the results on the provided channel. Receiver is responsible for
  102. // closing the channel, preferably using the Read method.
  103. func (ctx *Context) QueryAtTime(query string, t time.Time) source.QueryResultsChan {
  104. resCh := make(source.QueryResultsChan)
  105. go runQuery(query, ctx, resCh, t, "")
  106. return resCh
  107. }
  108. // ProfileQuery returns a QueryResultsChan, then runs the given query with a profile
  109. // label and sends the results on the provided channel. Receiver is responsible for closing the
  110. // channel, preferably using the Read method.
  111. func (ctx *Context) ProfileQuery(query string, profileLabel string) source.QueryResultsChan {
  112. resCh := make(source.QueryResultsChan)
  113. go runQuery(query, ctx, resCh, time.Now(), profileLabel)
  114. return resCh
  115. }
  116. // QueryAll returns one QueryResultsChan for each query provided, then runs
  117. // each query concurrently and returns results on each channel, respectively,
  118. // in the order they were provided; i.e. the response to queries[1] will be
  119. // sent on channel resChs[1].
  120. func (ctx *Context) QueryAll(queries ...string) []source.QueryResultsChan {
  121. resChs := []source.QueryResultsChan{}
  122. for _, q := range queries {
  123. resChs = append(resChs, ctx.Query(q))
  124. }
  125. return resChs
  126. }
  127. // ProfileQueryAll returns one QueryResultsChan for each query provided, then runs
  128. // each ProfileQuery concurrently and returns results on each channel, respectively,
  129. // in the order they were provided; i.e. the response to queries[1] will be
  130. // sent on channel resChs[1].
  131. func (ctx *Context) ProfileQueryAll(queries ...string) []source.QueryResultsChan {
  132. resChs := []source.QueryResultsChan{}
  133. for _, q := range queries {
  134. resChs = append(resChs, ctx.ProfileQuery(q, fmt.Sprintf("Query #%d", len(resChs)+1)))
  135. }
  136. return resChs
  137. }
  138. func (ctx *Context) QuerySync(query string) ([]*source.QueryResult, v1.Warnings, error) {
  139. raw, warnings, err := ctx.query(query, time.Now())
  140. if err != nil {
  141. return nil, warnings, err
  142. }
  143. // create result keys from custom cluster label
  144. resultKeys := source.ClusterKeyWithDefaults(ctx.config.ClusterLabel)
  145. results := NewQueryResults(query, raw, resultKeys)
  146. if results.Error != nil {
  147. return nil, warnings, results.Error
  148. }
  149. return results.Results, warnings, nil
  150. }
  151. // QueryURL returns the URL used to query Prometheus
  152. func (ctx *Context) QueryURL() *url.URL {
  153. return ctx.Client.URL(epQuery, nil)
  154. }
  155. // runQuery executes the prometheus query asynchronously, collects results and
  156. // errors, and passes them through the results channel.
  157. func runQuery(query string, ctx *Context, resCh source.QueryResultsChan, t time.Time, profileLabel string) {
  158. defer errors.HandlePanic()
  159. startQuery := time.Now()
  160. raw, warnings, requestError := ctx.query(query, t)
  161. var parseError error
  162. var results *source.QueryResults
  163. if requestError != nil {
  164. results = NewQueryResultError(query, requestError)
  165. } else {
  166. // create result keys from custom cluster label
  167. resultKeys := source.ClusterKeyWithDefaults(ctx.config.ClusterLabel)
  168. results = NewQueryResults(query, raw, resultKeys)
  169. parseError = results.Error
  170. }
  171. // report all warnings, request, and parse errors (nils will be ignored)
  172. ctx.errorCollector.Report(query, warnings, requestError, parseError)
  173. if profileLabel != "" {
  174. log.Profile(startQuery, profileLabel)
  175. }
  176. resCh <- results
  177. }
  178. // RawQuery is a direct query to the prometheus client and returns the body of the response
  179. func (ctx *Context) RawQuery(query string, t time.Time) ([]byte, error) {
  180. u := ctx.Client.URL(epQuery, nil)
  181. q := u.Query()
  182. q.Set("query", query)
  183. if t.IsZero() {
  184. t = time.Now()
  185. }
  186. q.Set("time", strconv.FormatInt(t.Unix(), 10))
  187. u.RawQuery = q.Encode()
  188. req, err := http.NewRequest(http.MethodPost, u.String(), nil)
  189. if err != nil {
  190. return nil, err
  191. }
  192. // Set QueryContext name if non empty
  193. if ctx.name != "" {
  194. req = httputil.SetName(req, ctx.name)
  195. }
  196. req = httputil.SetQuery(req, query)
  197. // Note that the warnings return value from client.Do() is always nil using this
  198. // version of the prometheus client library. We parse the warnings out of the response
  199. // body after json decodidng completes.
  200. resp, body, err := ctx.Client.Do(context.Background(), req)
  201. if err != nil {
  202. if resp == nil {
  203. return nil, fmt.Errorf("query error: '%s' fetching query '%s'", err.Error(), query)
  204. }
  205. return nil, fmt.Errorf("query error %d: '%s' fetching query '%s'", resp.StatusCode, err.Error(), query)
  206. }
  207. // Unsuccessful Status Code, log body and status
  208. statusCode := resp.StatusCode
  209. statusText := http.StatusText(statusCode)
  210. if resp.StatusCode < 200 || resp.StatusCode >= 300 {
  211. return nil, source.CommErrorf("%d (%s) URL: '%s', Body: '%s' Query: '%s'", statusCode, statusText, req.URL, body, query)
  212. }
  213. return body, err
  214. }
  215. func (ctx *Context) query(query string, t time.Time) (interface{}, v1.Warnings, error) {
  216. body, err := ctx.RawQuery(query, t)
  217. if err != nil {
  218. return nil, nil, err
  219. }
  220. var toReturn interface{}
  221. err = json.Unmarshal(body, &toReturn)
  222. if err != nil {
  223. return nil, nil, fmt.Errorf("query '%s' caused unmarshal error: %s", query, err)
  224. }
  225. warnings := warningsFrom(toReturn)
  226. for _, w := range warnings {
  227. // NoStoreAPIWarning is a warning that we would consider an error. It returns partial data relating only to the
  228. // store apis which were reachable. In order to ensure integrity of data across all clusters, we'll need to identify
  229. // this warning and convert it to an error.
  230. if source.IsNoStoreAPIWarning(w) {
  231. return nil, warnings, source.CommErrorf("Error: %s, Body: %s, Query: %s", w, body, query)
  232. }
  233. log.Warnf("fetching query '%s': %s", query, w)
  234. }
  235. return toReturn, warnings, nil
  236. }
  237. // isRequestStepAligned will check if the start and end times are aligned with the step
  238. func (ctx *Context) isRequestStepAligned(start, end time.Time, step time.Duration) bool {
  239. startInUnix := start.Unix()
  240. endInUnix := end.Unix()
  241. stepInSeconds := step.Milliseconds() / 1e3
  242. return startInUnix%stepInSeconds == 0 && endInUnix%stepInSeconds == 0
  243. }
  244. func (ctx *Context) QueryRange(query string, start, end time.Time, step time.Duration) source.QueryResultsChan {
  245. resCh := make(source.QueryResultsChan)
  246. if !ctx.isRequestStepAligned(start, end, step) {
  247. start, end = ctx.alignWindow(start, end, step)
  248. }
  249. go runQueryRange(query, start, end, step, ctx, resCh, "")
  250. return resCh
  251. }
  252. func (ctx *Context) ProfileQueryRange(query string, start, end time.Time, step time.Duration, profileLabel string) source.QueryResultsChan {
  253. resCh := make(source.QueryResultsChan)
  254. go runQueryRange(query, start, end, step, ctx, resCh, profileLabel)
  255. return resCh
  256. }
  257. func (ctx *Context) QueryRangeSync(query string, start, end time.Time, step time.Duration) ([]*source.QueryResult, v1.Warnings, error) {
  258. raw, warnings, err := ctx.queryRange(query, start, end, step)
  259. if err != nil {
  260. return nil, warnings, err
  261. }
  262. // create result keys from custom cluster label
  263. resultKeys := source.ClusterKeyWithDefaults(ctx.config.ClusterLabel)
  264. results := NewQueryResults(query, raw, resultKeys)
  265. if results.Error != nil {
  266. return nil, warnings, results.Error
  267. }
  268. return results.Results, warnings, nil
  269. }
  270. // QueryRangeURL returns the URL used to query_range Prometheus
  271. func (ctx *Context) QueryRangeURL() *url.URL {
  272. return ctx.Client.URL(epQueryRange, nil)
  273. }
  274. // runQueryRange executes the prometheus queryRange asynchronously, collects results and
  275. // errors, and passes them through the results channel.
  276. func runQueryRange(query string, start, end time.Time, step time.Duration, ctx *Context, resCh source.QueryResultsChan, profileLabel string) {
  277. defer errors.HandlePanic()
  278. startQuery := time.Now()
  279. raw, warnings, requestError := ctx.queryRange(query, start, end, step)
  280. var parseError error
  281. var results *source.QueryResults
  282. if requestError != nil {
  283. results = NewQueryResultError(query, requestError)
  284. } else {
  285. // create result keys from custom cluster label
  286. resultKeys := source.ClusterKeyWithDefaults(ctx.config.ClusterLabel)
  287. results = NewQueryResults(query, raw, resultKeys)
  288. parseError = results.Error
  289. }
  290. // report all warnings, request, and parse errors (nils will be ignored)
  291. ctx.errorCollector.Report(query, warnings, requestError, parseError)
  292. if profileLabel != "" {
  293. log.Profile(startQuery, profileLabel)
  294. }
  295. resCh <- results
  296. }
  297. // RawQuery is a direct query to the prometheus client and returns the body of the response
  298. func (ctx *Context) RawQueryRange(query string, start, end time.Time, step time.Duration) ([]byte, error) {
  299. u := ctx.Client.URL(epQueryRange, nil)
  300. q := u.Query()
  301. q.Set("query", query)
  302. q.Set("start", start.Format(time.RFC3339Nano))
  303. q.Set("end", end.Format(time.RFC3339Nano))
  304. q.Set("step", strconv.FormatFloat(step.Seconds(), 'f', 3, 64))
  305. u.RawQuery = q.Encode()
  306. req, err := http.NewRequest(http.MethodPost, u.String(), nil)
  307. if err != nil {
  308. return nil, err
  309. }
  310. // Set QueryContext name if non empty
  311. if ctx.name != "" {
  312. req = httputil.SetName(req, ctx.name)
  313. }
  314. req = httputil.SetQuery(req, query)
  315. // Note that the warnings return value from client.Do() is always nil using this
  316. // version of the prometheus client library. We parse the warnings out of the response
  317. // body after json decodidng completes.
  318. resp, body, err := ctx.Client.Do(context.Background(), req)
  319. if err != nil {
  320. if resp == nil {
  321. return nil, fmt.Errorf("Error: %s, Body: %s Query: %s", err.Error(), body, query)
  322. }
  323. return nil, fmt.Errorf("%d (%s) Error: %s Body: %s Query: %s", resp.StatusCode, http.StatusText(resp.StatusCode), body, err.Error(), query)
  324. }
  325. // Unsuccessful Status Code, log body and status
  326. statusCode := resp.StatusCode
  327. statusText := http.StatusText(statusCode)
  328. if resp.StatusCode < 200 || resp.StatusCode >= 300 {
  329. return nil, source.CommErrorf("%d (%s) Body: %s Query: %s", statusCode, statusText, body, query)
  330. }
  331. return body, err
  332. }
  333. func (ctx *Context) queryRange(query string, start, end time.Time, step time.Duration) (interface{}, v1.Warnings, error) {
  334. body, err := ctx.RawQueryRange(query, start, end, step)
  335. if err != nil {
  336. return nil, nil, err
  337. }
  338. var toReturn interface{}
  339. err = json.Unmarshal(body, &toReturn)
  340. if err != nil {
  341. return nil, nil, fmt.Errorf("query '%s' caused unmarshal error: %s", query, err)
  342. }
  343. warnings := warningsFrom(toReturn)
  344. for _, w := range warnings {
  345. // NoStoreAPIWarning is a warning that we would consider an error. It returns partial data relating only to the
  346. // store apis which were reachable. In order to ensure integrity of data across all clusters, we'll need to identify
  347. // this warning and convert it to an error.
  348. if source.IsNoStoreAPIWarning(w) {
  349. return nil, warnings, source.CommErrorf("Error: %s, Body: %s, Query: %s", w, body, query)
  350. }
  351. log.Warnf("fetching query '%s': %s", query, w)
  352. }
  353. return toReturn, warnings, nil
  354. }
  355. // alignWindow will update the start and end times to be aligned with the step duration.
  356. // Current implementation will always floor the start/end times
  357. func (ctx *Context) alignWindow(start time.Time, end time.Time, step time.Duration) (time.Time, time.Time) {
  358. // Convert the step duration from Milliseconds to Seconds to match the Unix timestamp, which is in seconds
  359. stepInSeconds := step.Milliseconds() / 1e3
  360. alignedStart := (start.Unix() / stepInSeconds) * stepInSeconds
  361. alignedEnd := (end.Unix() / stepInSeconds) * stepInSeconds
  362. return time.Unix(alignedStart, 0).UTC(), time.Unix(alignedEnd, 0).UTC()
  363. }
  364. // Extracts the warnings from the resulting json if they exist (part of the prometheus response api).
  365. func warningsFrom(result interface{}) v1.Warnings {
  366. var warnings v1.Warnings
  367. if resultMap, ok := result.(map[string]interface{}); ok {
  368. if warningProp, ok := resultMap["warnings"]; ok {
  369. if w, ok := warningProp.([]string); ok {
  370. warnings = w
  371. }
  372. }
  373. }
  374. return warnings
  375. }