query.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. package prom
  2. import (
  3. "context"
  4. "fmt"
  5. "net/http"
  6. "net/url"
  7. "strconv"
  8. "time"
  9. "github.com/opencost/opencost/pkg/env"
  10. "github.com/opencost/opencost/pkg/errors"
  11. "github.com/opencost/opencost/pkg/log"
  12. "github.com/opencost/opencost/pkg/util/httputil"
  13. "github.com/opencost/opencost/pkg/util/json"
  14. prometheus "github.com/prometheus/client_golang/api"
  15. )
  16. const (
  17. apiPrefix = "/api/v1"
  18. epQuery = apiPrefix + "/query"
  19. epQueryRange = apiPrefix + "/query_range"
  20. )
  21. // prometheus query offset to apply to each non-range query
  22. // package scope to prevent calling duration parse each use
  23. var promQueryOffset time.Duration = env.GetPrometheusQueryOffset()
  24. // Context wraps a Prometheus client and provides methods for querying and
  25. // parsing query responses and errors.
  26. type Context struct {
  27. Client prometheus.Client
  28. name string
  29. errorCollector *QueryErrorCollector
  30. }
  31. // NewContext creates a new Promethues querying context from the given client
  32. func NewContext(client prometheus.Client) *Context {
  33. var ec QueryErrorCollector
  34. return &Context{
  35. Client: client,
  36. name: "",
  37. errorCollector: &ec,
  38. }
  39. }
  40. // NewNamedContext creates a new named Promethues querying context from the given client
  41. func NewNamedContext(client prometheus.Client, name string) *Context {
  42. ctx := NewContext(client)
  43. ctx.name = name
  44. return ctx
  45. }
  46. // Warnings returns the warnings collected from the Context's ErrorCollector
  47. func (ctx *Context) Warnings() []*QueryWarning {
  48. return ctx.errorCollector.Warnings()
  49. }
  50. // HasWarnings returns true if the ErrorCollector has warnings.
  51. func (ctx *Context) HasWarnings() bool {
  52. return ctx.errorCollector.IsWarning()
  53. }
  54. // Errors returns the errors collected from the Context's ErrorCollector.
  55. func (ctx *Context) Errors() []*QueryError {
  56. return ctx.errorCollector.Errors()
  57. }
  58. // HasErrors returns true if the ErrorCollector has errors
  59. func (ctx *Context) HasErrors() bool {
  60. return ctx.errorCollector.IsError()
  61. }
  62. // ErrorCollection returns the aggregation of errors if there exists errors. Otherwise,
  63. // nil is returned
  64. func (ctx *Context) ErrorCollection() error {
  65. if ctx.errorCollector.IsError() {
  66. // errorCollector implements the error interface
  67. return ctx.errorCollector
  68. }
  69. return nil
  70. }
  71. // Query returns a QueryResultsChan, then runs the given query and sends the
  72. // results on the provided channel. Receiver is responsible for closing the
  73. // channel, preferably using the Read method.
  74. func (ctx *Context) Query(query string) QueryResultsChan {
  75. resCh := make(QueryResultsChan)
  76. go runQuery(query, ctx, resCh, time.Now(), "")
  77. return resCh
  78. }
  79. // QueryWithTime returns a QueryResultsChan, then runs the given query at the
  80. // given time (see time parameter here: https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries)
  81. // and sends the results on the provided channel. Receiver is responsible for
  82. // closing the channel, preferably using the Read method.
  83. func (ctx *Context) QueryAtTime(query string, t time.Time) QueryResultsChan {
  84. resCh := make(QueryResultsChan)
  85. go runQuery(query, ctx, resCh, t, "")
  86. return resCh
  87. }
  88. // ProfileQuery returns a QueryResultsChan, then runs the given query with a profile
  89. // label and sends the results on the provided channel. Receiver is responsible for closing the
  90. // channel, preferably using the Read method.
  91. func (ctx *Context) ProfileQuery(query string, profileLabel string) QueryResultsChan {
  92. resCh := make(QueryResultsChan)
  93. go runQuery(query, ctx, resCh, time.Now(), profileLabel)
  94. return resCh
  95. }
  96. // QueryAll returns one QueryResultsChan for each query provided, then runs
  97. // each query concurrently and returns results on each channel, respectively,
  98. // in the order they were provided; i.e. the response to queries[1] will be
  99. // sent on channel resChs[1].
  100. func (ctx *Context) QueryAll(queries ...string) []QueryResultsChan {
  101. resChs := []QueryResultsChan{}
  102. for _, q := range queries {
  103. resChs = append(resChs, ctx.Query(q))
  104. }
  105. return resChs
  106. }
  107. // ProfileQueryAll returns one QueryResultsChan for each query provided, then runs
  108. // each ProfileQuery concurrently and returns results on each channel, respectively,
  109. // in the order they were provided; i.e. the response to queries[1] will be
  110. // sent on channel resChs[1].
  111. func (ctx *Context) ProfileQueryAll(queries ...string) []QueryResultsChan {
  112. resChs := []QueryResultsChan{}
  113. for _, q := range queries {
  114. resChs = append(resChs, ctx.ProfileQuery(q, fmt.Sprintf("Query #%d", len(resChs)+1)))
  115. }
  116. return resChs
  117. }
  118. func (ctx *Context) QuerySync(query string) ([]*QueryResult, prometheus.Warnings, error) {
  119. raw, warnings, err := ctx.query(query, time.Now())
  120. if err != nil {
  121. return nil, warnings, err
  122. }
  123. results := NewQueryResults(query, raw)
  124. if results.Error != nil {
  125. return nil, warnings, results.Error
  126. }
  127. return results.Results, warnings, nil
  128. }
  129. // QueryURL returns the URL used to query Prometheus
  130. func (ctx *Context) QueryURL() *url.URL {
  131. return ctx.Client.URL(epQuery, nil)
  132. }
  133. // runQuery executes the prometheus query asynchronously, collects results and
  134. // errors, and passes them through the results channel.
  135. func runQuery(query string, ctx *Context, resCh QueryResultsChan, t time.Time, profileLabel string) {
  136. defer errors.HandlePanic()
  137. startQuery := time.Now()
  138. raw, warnings, requestError := ctx.query(query, t)
  139. results := NewQueryResults(query, raw)
  140. // report all warnings, request, and parse errors (nils will be ignored)
  141. ctx.errorCollector.Report(query, warnings, requestError, results.Error)
  142. if profileLabel != "" {
  143. log.Profile(startQuery, profileLabel)
  144. }
  145. resCh <- results
  146. }
  147. // RawQuery is a direct query to the prometheus client and returns the body of the response
  148. func (ctx *Context) RawQuery(query string, t time.Time) ([]byte, error) {
  149. u := ctx.Client.URL(epQuery, nil)
  150. q := u.Query()
  151. q.Set("query", query)
  152. if t.IsZero() {
  153. t = time.Now()
  154. }
  155. q.Set("time", strconv.FormatInt(t.Unix(), 10))
  156. u.RawQuery = q.Encode()
  157. req, err := http.NewRequest(http.MethodPost, u.String(), nil)
  158. if err != nil {
  159. return nil, err
  160. }
  161. // Set QueryContext name if non empty
  162. if ctx.name != "" {
  163. req = httputil.SetName(req, ctx.name)
  164. }
  165. req = httputil.SetQuery(req, query)
  166. // Note that the warnings return value from client.Do() is always nil using this
  167. // version of the prometheus client library. We parse the warnings out of the response
  168. // body after json decodidng completes.
  169. resp, body, _, err := ctx.Client.Do(context.Background(), req)
  170. if err != nil {
  171. if resp == nil {
  172. return nil, fmt.Errorf("query error: '%s' fetching query '%s'", err.Error(), query)
  173. }
  174. return nil, fmt.Errorf("query error %d: '%s' fetching query '%s'", resp.StatusCode, err.Error(), query)
  175. }
  176. // Unsuccessful Status Code, log body and status
  177. statusCode := resp.StatusCode
  178. statusText := http.StatusText(statusCode)
  179. if resp.StatusCode < 200 || resp.StatusCode >= 300 {
  180. return nil, CommErrorf("%d (%s) URL: '%s', Request Headers: '%s', Headers: '%s', Body: '%s' Query: '%s'", statusCode, statusText, req.URL, req.Header, httputil.HeaderString(resp.Header), body, query)
  181. }
  182. return body, err
  183. }
  184. func (ctx *Context) query(query string, t time.Time) (interface{}, prometheus.Warnings, error) {
  185. body, err := ctx.RawQuery(query, t)
  186. if err != nil {
  187. return nil, nil, err
  188. }
  189. var toReturn interface{}
  190. err = json.Unmarshal(body, &toReturn)
  191. if err != nil {
  192. return nil, nil, fmt.Errorf("query '%s' caused unmarshal error: %s", query, err)
  193. }
  194. warnings := warningsFrom(toReturn)
  195. for _, w := range warnings {
  196. // NoStoreAPIWarning is a warning that we would consider an error. It returns partial data relating only to the
  197. // store apis which were reachable. In order to ensure integrity of data across all clusters, we'll need to identify
  198. // this warning and convert it to an error.
  199. if IsNoStoreAPIWarning(w) {
  200. return nil, warnings, CommErrorf("Error: %s, Body: %s, Query: %s", w, body, query)
  201. }
  202. log.Warnf("fetching query '%s': %s", query, w)
  203. }
  204. return toReturn, warnings, nil
  205. }
  206. func (ctx *Context) QueryRange(query string, start, end time.Time, step time.Duration) QueryResultsChan {
  207. resCh := make(QueryResultsChan)
  208. go runQueryRange(query, start, end, step, ctx, resCh, "")
  209. return resCh
  210. }
  211. func (ctx *Context) ProfileQueryRange(query string, start, end time.Time, step time.Duration, profileLabel string) QueryResultsChan {
  212. resCh := make(QueryResultsChan)
  213. go runQueryRange(query, start, end, step, ctx, resCh, profileLabel)
  214. return resCh
  215. }
  216. func (ctx *Context) QueryRangeSync(query string, start, end time.Time, step time.Duration) ([]*QueryResult, prometheus.Warnings, error) {
  217. raw, warnings, err := ctx.queryRange(query, start, end, step)
  218. if err != nil {
  219. return nil, warnings, err
  220. }
  221. results := NewQueryResults(query, raw)
  222. if results.Error != nil {
  223. return nil, warnings, results.Error
  224. }
  225. return results.Results, warnings, nil
  226. }
  227. // QueryRangeURL returns the URL used to query_range Prometheus
  228. func (ctx *Context) QueryRangeURL() *url.URL {
  229. return ctx.Client.URL(epQueryRange, nil)
  230. }
  231. // runQueryRange executes the prometheus queryRange asynchronously, collects results and
  232. // errors, and passes them through the results channel.
  233. func runQueryRange(query string, start, end time.Time, step time.Duration, ctx *Context, resCh QueryResultsChan, profileLabel string) {
  234. defer errors.HandlePanic()
  235. startQuery := time.Now()
  236. raw, warnings, requestError := ctx.queryRange(query, start, end, step)
  237. results := NewQueryResults(query, raw)
  238. // report all warnings, request, and parse errors (nils will be ignored)
  239. ctx.errorCollector.Report(query, warnings, requestError, results.Error)
  240. if profileLabel != "" {
  241. log.Profile(startQuery, profileLabel)
  242. }
  243. resCh <- results
  244. }
  245. // RawQuery is a direct query to the prometheus client and returns the body of the response
  246. func (ctx *Context) RawQueryRange(query string, start, end time.Time, step time.Duration) ([]byte, error) {
  247. u := ctx.Client.URL(epQueryRange, nil)
  248. q := u.Query()
  249. q.Set("query", query)
  250. q.Set("start", start.Format(time.RFC3339Nano))
  251. q.Set("end", end.Format(time.RFC3339Nano))
  252. q.Set("step", strconv.FormatFloat(step.Seconds(), 'f', 3, 64))
  253. u.RawQuery = q.Encode()
  254. req, err := http.NewRequest(http.MethodPost, u.String(), nil)
  255. if err != nil {
  256. return nil, err
  257. }
  258. // Set QueryContext name if non empty
  259. if ctx.name != "" {
  260. req = httputil.SetName(req, ctx.name)
  261. }
  262. req = httputil.SetQuery(req, query)
  263. // Note that the warnings return value from client.Do() is always nil using this
  264. // version of the prometheus client library. We parse the warnings out of the response
  265. // body after json decodidng completes.
  266. resp, body, _, err := ctx.Client.Do(context.Background(), req)
  267. if err != nil {
  268. if resp == nil {
  269. return nil, fmt.Errorf("Error: %s, Body: %s Query: %s", err.Error(), body, query)
  270. }
  271. return nil, fmt.Errorf("%d (%s) Headers: %s Error: %s Body: %s Query: %s", resp.StatusCode, http.StatusText(resp.StatusCode), httputil.HeaderString(resp.Header), body, err.Error(), query)
  272. }
  273. // Unsuccessful Status Code, log body and status
  274. statusCode := resp.StatusCode
  275. statusText := http.StatusText(statusCode)
  276. if resp.StatusCode < 200 || resp.StatusCode >= 300 {
  277. return nil, CommErrorf("%d (%s) Headers: %s, Body: %s Query: %s", statusCode, statusText, httputil.HeaderString(resp.Header), body, query)
  278. }
  279. return body, err
  280. }
  281. func (ctx *Context) queryRange(query string, start, end time.Time, step time.Duration) (interface{}, prometheus.Warnings, error) {
  282. body, err := ctx.RawQueryRange(query, start, end, step)
  283. if err != nil {
  284. return nil, nil, err
  285. }
  286. var toReturn interface{}
  287. err = json.Unmarshal(body, &toReturn)
  288. if err != nil {
  289. return nil, nil, fmt.Errorf("query '%s' caused unmarshal error: %s", query, err)
  290. }
  291. warnings := warningsFrom(toReturn)
  292. for _, w := range warnings {
  293. // NoStoreAPIWarning is a warning that we would consider an error. It returns partial data relating only to the
  294. // store apis which were reachable. In order to ensure integrity of data across all clusters, we'll need to identify
  295. // this warning and convert it to an error.
  296. if IsNoStoreAPIWarning(w) {
  297. return nil, warnings, CommErrorf("Error: %s, Body: %s, Query: %s", w, body, query)
  298. }
  299. log.Warnf("fetching query '%s': %s", query, w)
  300. }
  301. return toReturn, warnings, nil
  302. }
  303. // Extracts the warnings from the resulting json if they exist (part of the prometheus response api).
  304. func warningsFrom(result interface{}) prometheus.Warnings {
  305. var warnings prometheus.Warnings
  306. if resultMap, ok := result.(map[string]interface{}); ok {
  307. if warningProp, ok := resultMap["warnings"]; ok {
  308. if w, ok := warningProp.([]string); ok {
  309. warnings = w
  310. }
  311. }
  312. }
  313. return warnings
  314. }