router.go 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905
  1. package costmodel
  2. import (
  3. "context"
  4. "encoding/base64"
  5. "fmt"
  6. "io"
  7. "net/http"
  8. "os"
  9. "path"
  10. "reflect"
  11. "regexp"
  12. "strconv"
  13. "strings"
  14. "sync"
  15. "time"
  16. "github.com/microcosm-cc/bluemonday"
  17. "github.com/opencost/opencost/core/pkg/opencost"
  18. "github.com/opencost/opencost/core/pkg/util/httputil"
  19. "github.com/opencost/opencost/core/pkg/util/timeutil"
  20. "github.com/opencost/opencost/core/pkg/util/watcher"
  21. "github.com/opencost/opencost/core/pkg/version"
  22. "github.com/opencost/opencost/pkg/cloud/aws"
  23. cloudconfig "github.com/opencost/opencost/pkg/cloud/config"
  24. "github.com/opencost/opencost/pkg/cloud/gcp"
  25. "github.com/opencost/opencost/pkg/cloud/provider"
  26. "github.com/opencost/opencost/pkg/cloudcost"
  27. "github.com/opencost/opencost/pkg/config"
  28. clustermap "github.com/opencost/opencost/pkg/costmodel/clusters"
  29. "github.com/opencost/opencost/pkg/customcost"
  30. "github.com/opencost/opencost/pkg/kubeconfig"
  31. "github.com/opencost/opencost/pkg/metrics"
  32. "github.com/opencost/opencost/pkg/services"
  33. "github.com/spf13/viper"
  34. v1 "k8s.io/api/core/v1"
  35. "github.com/julienschmidt/httprouter"
  36. "github.com/getsentry/sentry-go"
  37. "github.com/opencost/opencost/core/pkg/clusters"
  38. sysenv "github.com/opencost/opencost/core/pkg/env"
  39. "github.com/opencost/opencost/core/pkg/log"
  40. "github.com/opencost/opencost/core/pkg/util/json"
  41. "github.com/opencost/opencost/pkg/cloud/azure"
  42. "github.com/opencost/opencost/pkg/cloud/models"
  43. "github.com/opencost/opencost/pkg/cloud/utils"
  44. "github.com/opencost/opencost/pkg/clustercache"
  45. "github.com/opencost/opencost/pkg/env"
  46. "github.com/opencost/opencost/pkg/errors"
  47. "github.com/opencost/opencost/pkg/prom"
  48. "github.com/opencost/opencost/pkg/thanos"
  49. prometheus "github.com/prometheus/client_golang/api"
  50. prometheusAPI "github.com/prometheus/client_golang/api/prometheus/v1"
  51. appsv1 "k8s.io/api/apps/v1"
  52. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  53. "github.com/patrickmn/go-cache"
  54. "k8s.io/client-go/kubernetes"
  55. )
  56. var sanitizePolicy = bluemonday.UGCPolicy()
  57. const (
  58. RFC3339Milli = "2006-01-02T15:04:05.000Z"
  59. maxCacheMinutes1d = 11
  60. maxCacheMinutes2d = 17
  61. maxCacheMinutes7d = 37
  62. maxCacheMinutes30d = 137
  63. CustomPricingSetting = "CustomPricing"
  64. DiscountSetting = "Discount"
  65. epRules = apiPrefix + "/rules"
  66. LogSeparator = "+-------------------------------------------------------------------------------------"
  67. )
  68. var (
  69. // gitCommit is set by the build system
  70. gitCommit string
  71. // ANSIRegex matches ANSI escape and colors https://en.wikipedia.org/wiki/ANSI_escape_code
  72. ANSIRegex = regexp.MustCompile("\x1b\\[[0-9;]*m")
  73. )
  74. // Accesses defines a singleton application instance, providing access to
  75. // Prometheus, Kubernetes, the cloud provider, and caches.
  76. type Accesses struct {
  77. Router *httprouter.Router
  78. PrometheusClient prometheus.Client
  79. ThanosClient prometheus.Client
  80. KubeClientSet kubernetes.Interface
  81. ClusterCache clustercache.ClusterCache
  82. ClusterMap clusters.ClusterMap
  83. CloudProvider models.Provider
  84. ConfigFileManager *config.ConfigFileManager
  85. CloudConfigController *cloudconfig.Controller
  86. CloudCostPipelineService *cloudcost.PipelineService
  87. CloudCostQueryService *cloudcost.QueryService
  88. CustomCostQueryService *customcost.QueryService
  89. CustomCostPipelineService *customcost.PipelineService
  90. ClusterInfoProvider clusters.ClusterInfoProvider
  91. Model *CostModel
  92. MetricsEmitter *CostModelMetricsEmitter
  93. OutOfClusterCache *cache.Cache
  94. AggregateCache *cache.Cache
  95. CostDataCache *cache.Cache
  96. ClusterCostsCache *cache.Cache
  97. CacheExpiration map[time.Duration]time.Duration
  98. AggAPI Aggregator
  99. // SettingsCache stores current state of app settings
  100. SettingsCache *cache.Cache
  101. // settingsSubscribers tracks channels through which changes to different
  102. // settings will be published in a pub/sub model
  103. settingsSubscribers map[string][]chan string
  104. settingsMutex sync.Mutex
  105. // registered http service instances
  106. httpServices services.HTTPServices
  107. }
  108. // GetPrometheusClient decides whether the default Prometheus client or the Thanos client
  109. // should be used.
  110. func (a *Accesses) GetPrometheusClient(remote bool) prometheus.Client {
  111. // Use Thanos Client if it exists (enabled) and remote flag set
  112. var pc prometheus.Client
  113. if remote && a.ThanosClient != nil {
  114. pc = a.ThanosClient
  115. } else {
  116. pc = a.PrometheusClient
  117. }
  118. return pc
  119. }
  120. // GetCacheExpiration looks up and returns custom cache expiration for the given duration.
  121. // If one does not exists, it returns the default cache expiration, which is defined by
  122. // the particular cache.
  123. func (a *Accesses) GetCacheExpiration(dur time.Duration) time.Duration {
  124. if expiration, ok := a.CacheExpiration[dur]; ok {
  125. return expiration
  126. }
  127. return cache.DefaultExpiration
  128. }
  129. // GetCacheRefresh determines how long to wait before refreshing the cache for the given duration,
  130. // which is done 1 minute before we expect the cache to expire, or 1 minute if expiration is
  131. // not found or is less than 2 minutes.
  132. func (a *Accesses) GetCacheRefresh(dur time.Duration) time.Duration {
  133. expiry := a.GetCacheExpiration(dur).Minutes()
  134. if expiry <= 2.0 {
  135. return time.Minute
  136. }
  137. mins := time.Duration(expiry/2.0) * time.Minute
  138. return mins
  139. }
  140. func (a *Accesses) ClusterCostsFromCacheHandler(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  141. w.Header().Set("Content-Type", "application/json")
  142. duration := 24 * time.Hour
  143. offset := time.Minute
  144. durationHrs := "24h"
  145. fmtOffset := "1m"
  146. pClient := a.GetPrometheusClient(true)
  147. key := fmt.Sprintf("%s:%s", durationHrs, fmtOffset)
  148. if data, valid := a.ClusterCostsCache.Get(key); valid {
  149. clusterCosts := data.(map[string]*ClusterCosts)
  150. w.Write(WrapDataWithMessage(clusterCosts, nil, "clusterCosts cache hit"))
  151. } else {
  152. data, err := a.ComputeClusterCosts(pClient, a.CloudProvider, duration, offset, true)
  153. w.Write(WrapDataWithMessage(data, err, fmt.Sprintf("clusterCosts cache miss: %s", key)))
  154. }
  155. }
  156. type Response struct {
  157. Code int `json:"code"`
  158. Status string `json:"status"`
  159. Data interface{} `json:"data"`
  160. Message string `json:"message,omitempty"`
  161. Warning string `json:"warning,omitempty"`
  162. }
  163. // FilterFunc is a filter that returns true iff the given CostData should be filtered out, and the environment that was used as the filter criteria, if it was an aggregate
  164. type FilterFunc func(*CostData) (bool, string)
  165. // FilterCostData allows through only CostData that matches all the given filter functions
  166. func FilterCostData(data map[string]*CostData, retains []FilterFunc, filters []FilterFunc) (map[string]*CostData, int, map[string]int) {
  167. result := make(map[string]*CostData)
  168. filteredEnvironments := make(map[string]int)
  169. filteredContainers := 0
  170. DataLoop:
  171. for key, datum := range data {
  172. for _, rf := range retains {
  173. if ok, _ := rf(datum); ok {
  174. result[key] = datum
  175. // if any retain function passes, the data is retained and move on
  176. continue DataLoop
  177. }
  178. }
  179. for _, ff := range filters {
  180. if ok, environment := ff(datum); !ok {
  181. if environment != "" {
  182. filteredEnvironments[environment]++
  183. }
  184. filteredContainers++
  185. // if any filter function check fails, move on to the next datum
  186. continue DataLoop
  187. }
  188. }
  189. result[key] = datum
  190. }
  191. return result, filteredContainers, filteredEnvironments
  192. }
  193. func filterFields(fields string, data map[string]*CostData) map[string]CostData {
  194. fs := strings.Split(fields, ",")
  195. fmap := make(map[string]bool)
  196. for _, f := range fs {
  197. fieldNameLower := strings.ToLower(f) // convert to go struct name by uppercasing first letter
  198. log.Debugf("to delete: %s", fieldNameLower)
  199. fmap[fieldNameLower] = true
  200. }
  201. filteredData := make(map[string]CostData)
  202. for cname, costdata := range data {
  203. s := reflect.TypeOf(*costdata)
  204. val := reflect.ValueOf(*costdata)
  205. costdata2 := CostData{}
  206. cd2 := reflect.New(reflect.Indirect(reflect.ValueOf(costdata2)).Type()).Elem()
  207. n := s.NumField()
  208. for i := 0; i < n; i++ {
  209. field := s.Field(i)
  210. value := val.Field(i)
  211. value2 := cd2.Field(i)
  212. if _, ok := fmap[strings.ToLower(field.Name)]; !ok {
  213. value2.Set(reflect.Value(value))
  214. }
  215. }
  216. filteredData[cname] = cd2.Interface().(CostData)
  217. }
  218. return filteredData
  219. }
  220. func normalizeTimeParam(param string) (string, error) {
  221. if param == "" {
  222. return "", fmt.Errorf("invalid time param")
  223. }
  224. // convert days to hours
  225. if param[len(param)-1:] == "d" {
  226. count := param[:len(param)-1]
  227. val, err := strconv.ParseInt(count, 10, 64)
  228. if err != nil {
  229. return "", err
  230. }
  231. val = val * 24
  232. param = fmt.Sprintf("%dh", val)
  233. }
  234. return param, nil
  235. }
  236. // ParsePercentString takes a string of expected format "N%" and returns a floating point 0.0N.
  237. // If the "%" symbol is missing, it just returns 0.0N. Empty string is interpreted as "0%" and
  238. // return 0.0.
  239. func ParsePercentString(percentStr string) (float64, error) {
  240. if len(percentStr) == 0 {
  241. return 0.0, nil
  242. }
  243. if percentStr[len(percentStr)-1:] == "%" {
  244. percentStr = percentStr[:len(percentStr)-1]
  245. }
  246. discount, err := strconv.ParseFloat(percentStr, 64)
  247. if err != nil {
  248. return 0.0, err
  249. }
  250. discount *= 0.01
  251. return discount, nil
  252. }
  253. func WrapData(data interface{}, err error) []byte {
  254. var resp []byte
  255. if err != nil {
  256. log.Errorf("Error returned to client: %s", err.Error())
  257. resp, _ = json.Marshal(&Response{
  258. Code: http.StatusInternalServerError,
  259. Status: "error",
  260. Message: err.Error(),
  261. Data: data,
  262. })
  263. } else {
  264. resp, err = json.Marshal(&Response{
  265. Code: http.StatusOK,
  266. Status: "success",
  267. Data: data,
  268. })
  269. if err != nil {
  270. log.Errorf("error marshaling response json: %s", err.Error())
  271. }
  272. }
  273. return resp
  274. }
  275. func WrapDataWithMessage(data interface{}, err error, message string) []byte {
  276. var resp []byte
  277. if err != nil {
  278. log.Errorf("Error returned to client: %s", err.Error())
  279. resp, _ = json.Marshal(&Response{
  280. Code: http.StatusInternalServerError,
  281. Status: "error",
  282. Message: err.Error(),
  283. Data: data,
  284. })
  285. } else {
  286. resp, _ = json.Marshal(&Response{
  287. Code: http.StatusOK,
  288. Status: "success",
  289. Data: data,
  290. Message: message,
  291. })
  292. }
  293. return resp
  294. }
  295. func WrapDataWithWarning(data interface{}, err error, warning string) []byte {
  296. var resp []byte
  297. if err != nil {
  298. log.Errorf("Error returned to client: %s", err.Error())
  299. resp, _ = json.Marshal(&Response{
  300. Code: http.StatusInternalServerError,
  301. Status: "error",
  302. Message: err.Error(),
  303. Warning: warning,
  304. Data: data,
  305. })
  306. } else {
  307. resp, _ = json.Marshal(&Response{
  308. Code: http.StatusOK,
  309. Status: "success",
  310. Data: data,
  311. Warning: warning,
  312. })
  313. }
  314. return resp
  315. }
  316. func WrapDataWithMessageAndWarning(data interface{}, err error, message, warning string) []byte {
  317. var resp []byte
  318. if err != nil {
  319. log.Errorf("Error returned to client: %s", err.Error())
  320. resp, _ = json.Marshal(&Response{
  321. Code: http.StatusInternalServerError,
  322. Status: "error",
  323. Message: err.Error(),
  324. Warning: warning,
  325. Data: data,
  326. })
  327. } else {
  328. resp, _ = json.Marshal(&Response{
  329. Code: http.StatusOK,
  330. Status: "success",
  331. Data: data,
  332. Message: message,
  333. Warning: warning,
  334. })
  335. }
  336. return resp
  337. }
  338. // wrapAsObjectItems wraps a slice of items into an object containing a single items list
  339. // allows our k8s proxy methods to emulate a List() request to k8s API
  340. func wrapAsObjectItems(items interface{}) map[string]interface{} {
  341. return map[string]interface{}{
  342. "items": items,
  343. }
  344. }
  345. // RefreshPricingData needs to be called when a new node joins the fleet, since we cache the relevant subsets of pricing data to avoid storing the whole thing.
  346. func (a *Accesses) RefreshPricingData(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  347. w.Header().Set("Content-Type", "application/json")
  348. w.Header().Set("Access-Control-Allow-Origin", "*")
  349. err := a.CloudProvider.DownloadPricingData()
  350. if err != nil {
  351. log.Errorf("Error refreshing pricing data: %s", err.Error())
  352. }
  353. w.Write(WrapData(nil, err))
  354. }
  355. func (a *Accesses) CostDataModel(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  356. w.Header().Set("Content-Type", "application/json")
  357. w.Header().Set("Access-Control-Allow-Origin", "*")
  358. window := r.URL.Query().Get("timeWindow")
  359. offset := r.URL.Query().Get("offset")
  360. fields := r.URL.Query().Get("filterFields")
  361. namespace := r.URL.Query().Get("namespace")
  362. if offset != "" {
  363. offset = "offset " + offset
  364. }
  365. data, err := a.Model.ComputeCostData(a.PrometheusClient, a.CloudProvider, window, offset, namespace)
  366. if fields != "" {
  367. filteredData := filterFields(fields, data)
  368. w.Write(WrapData(filteredData, err))
  369. } else {
  370. w.Write(WrapData(data, err))
  371. }
  372. }
  373. func (a *Accesses) ClusterCosts(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  374. w.Header().Set("Content-Type", "application/json")
  375. w.Header().Set("Access-Control-Allow-Origin", "*")
  376. window := r.URL.Query().Get("window")
  377. offset := r.URL.Query().Get("offset")
  378. if window == "" {
  379. w.Write(WrapData(nil, fmt.Errorf("missing window argument")))
  380. return
  381. }
  382. windowDur, err := timeutil.ParseDuration(window)
  383. if err != nil {
  384. w.Write(WrapData(nil, fmt.Errorf("error parsing window (%s): %s", window, err)))
  385. return
  386. }
  387. // offset is not a required parameter
  388. var offsetDur time.Duration
  389. if offset != "" {
  390. offsetDur, err = timeutil.ParseDuration(offset)
  391. if err != nil {
  392. w.Write(WrapData(nil, fmt.Errorf("error parsing offset (%s): %s", offset, err)))
  393. return
  394. }
  395. }
  396. useThanos, _ := strconv.ParseBool(r.URL.Query().Get("multi"))
  397. if useThanos && !thanos.IsEnabled() {
  398. w.Write(WrapData(nil, fmt.Errorf("Multi=true while Thanos is not enabled.")))
  399. return
  400. }
  401. var client prometheus.Client
  402. if useThanos {
  403. client = a.ThanosClient
  404. offsetDur = thanos.OffsetDuration()
  405. } else {
  406. client = a.PrometheusClient
  407. }
  408. data, err := a.ComputeClusterCosts(client, a.CloudProvider, windowDur, offsetDur, true)
  409. w.Write(WrapData(data, err))
  410. }
  411. func (a *Accesses) ClusterCostsOverTime(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  412. w.Header().Set("Content-Type", "application/json")
  413. w.Header().Set("Access-Control-Allow-Origin", "*")
  414. start := r.URL.Query().Get("start")
  415. end := r.URL.Query().Get("end")
  416. window := r.URL.Query().Get("window")
  417. offset := r.URL.Query().Get("offset")
  418. if window == "" {
  419. w.Write(WrapData(nil, fmt.Errorf("missing window argument")))
  420. return
  421. }
  422. windowDur, err := timeutil.ParseDuration(window)
  423. if err != nil {
  424. w.Write(WrapData(nil, fmt.Errorf("error parsing window (%s): %s", window, err)))
  425. return
  426. }
  427. // offset is not a required parameter
  428. var offsetDur time.Duration
  429. if offset != "" {
  430. offsetDur, err = timeutil.ParseDuration(offset)
  431. if err != nil {
  432. w.Write(WrapData(nil, fmt.Errorf("error parsing offset (%s): %s", offset, err)))
  433. return
  434. }
  435. }
  436. data, err := ClusterCostsOverTime(a.PrometheusClient, a.CloudProvider, start, end, windowDur, offsetDur)
  437. w.Write(WrapData(data, err))
  438. }
  439. func (a *Accesses) CostDataModelRange(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  440. w.Header().Set("Content-Type", "application/json")
  441. w.Header().Set("Access-Control-Allow-Origin", "*")
  442. startStr := r.URL.Query().Get("start")
  443. endStr := r.URL.Query().Get("end")
  444. windowStr := r.URL.Query().Get("window")
  445. fields := r.URL.Query().Get("filterFields")
  446. namespace := r.URL.Query().Get("namespace")
  447. cluster := r.URL.Query().Get("cluster")
  448. remote := r.URL.Query().Get("remote")
  449. remoteEnabled := env.IsRemoteEnabled() && remote != "false"
  450. layout := "2006-01-02T15:04:05.000Z"
  451. start, err := time.Parse(layout, startStr)
  452. if err != nil {
  453. w.Write(WrapDataWithMessage(nil, fmt.Errorf("invalid start date: %s", startStr), fmt.Sprintf("invalid start date: %s", startStr)))
  454. return
  455. }
  456. end, err := time.Parse(layout, endStr)
  457. if err != nil {
  458. w.Write(WrapDataWithMessage(nil, fmt.Errorf("invalid end date: %s", endStr), fmt.Sprintf("invalid end date: %s", endStr)))
  459. return
  460. }
  461. window := opencost.NewWindow(&start, &end)
  462. if window.IsOpen() || !window.HasDuration() || window.IsNegative() {
  463. w.Write(WrapDataWithMessage(nil, fmt.Errorf("invalid date range: %s", window), fmt.Sprintf("invalid date range: %s", window)))
  464. return
  465. }
  466. resolution := time.Hour
  467. if resDur, err := time.ParseDuration(windowStr); err == nil {
  468. resolution = resDur
  469. }
  470. // Use Thanos Client if it exists (enabled) and remote flag set
  471. var pClient prometheus.Client
  472. if remote != "false" && a.ThanosClient != nil {
  473. pClient = a.ThanosClient
  474. } else {
  475. pClient = a.PrometheusClient
  476. }
  477. data, err := a.Model.ComputeCostDataRange(pClient, a.CloudProvider, window, resolution, namespace, cluster, remoteEnabled)
  478. if err != nil {
  479. w.Write(WrapData(nil, err))
  480. }
  481. if fields != "" {
  482. filteredData := filterFields(fields, data)
  483. w.Write(WrapData(filteredData, err))
  484. } else {
  485. w.Write(WrapData(data, err))
  486. }
  487. }
  488. func parseAggregations(customAggregation, aggregator, filterType string) (string, []string, string) {
  489. var key string
  490. var filter string
  491. var val []string
  492. if customAggregation != "" {
  493. key = customAggregation
  494. filter = filterType
  495. val = strings.Split(customAggregation, ",")
  496. } else {
  497. aggregations := strings.Split(aggregator, ",")
  498. for i, agg := range aggregations {
  499. aggregations[i] = "kubernetes_" + agg
  500. }
  501. key = strings.Join(aggregations, ",")
  502. filter = "kubernetes_" + filterType
  503. val = aggregations
  504. }
  505. return key, val, filter
  506. }
  507. func (a *Accesses) GetAllNodePricing(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  508. w.Header().Set("Content-Type", "application/json")
  509. w.Header().Set("Access-Control-Allow-Origin", "*")
  510. data, err := a.CloudProvider.AllNodePricing()
  511. w.Write(WrapData(data, err))
  512. }
  513. func (a *Accesses) GetConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  514. w.Header().Set("Content-Type", "application/json")
  515. w.Header().Set("Access-Control-Allow-Origin", "*")
  516. data, err := a.CloudProvider.GetConfig()
  517. w.Write(WrapData(data, err))
  518. }
  519. func (a *Accesses) UpdateSpotInfoConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  520. w.Header().Set("Content-Type", "application/json")
  521. w.Header().Set("Access-Control-Allow-Origin", "*")
  522. data, err := a.CloudProvider.UpdateConfig(r.Body, aws.SpotInfoUpdateType)
  523. if err != nil {
  524. w.Write(WrapData(data, err))
  525. return
  526. }
  527. w.Write(WrapData(data, err))
  528. err = a.CloudProvider.DownloadPricingData()
  529. if err != nil {
  530. log.Errorf("Error redownloading data on config update: %s", err.Error())
  531. }
  532. return
  533. }
  534. func (a *Accesses) UpdateAthenaInfoConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  535. w.Header().Set("Content-Type", "application/json")
  536. w.Header().Set("Access-Control-Allow-Origin", "*")
  537. data, err := a.CloudProvider.UpdateConfig(r.Body, aws.AthenaInfoUpdateType)
  538. if err != nil {
  539. w.Write(WrapData(data, err))
  540. return
  541. }
  542. w.Write(WrapData(data, err))
  543. return
  544. }
  545. func (a *Accesses) UpdateBigQueryInfoConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  546. w.Header().Set("Content-Type", "application/json")
  547. w.Header().Set("Access-Control-Allow-Origin", "*")
  548. data, err := a.CloudProvider.UpdateConfig(r.Body, gcp.BigqueryUpdateType)
  549. if err != nil {
  550. w.Write(WrapData(data, err))
  551. return
  552. }
  553. w.Write(WrapData(data, err))
  554. return
  555. }
  556. func (a *Accesses) UpdateAzureStorageConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  557. w.Header().Set("Content-Type", "application/json")
  558. w.Header().Set("Access-Control-Allow-Origin", "*")
  559. data, err := a.CloudProvider.UpdateConfig(r.Body, azure.AzureStorageUpdateType)
  560. if err != nil {
  561. w.Write(WrapData(data, err))
  562. return
  563. }
  564. w.Write(WrapData(data, err))
  565. return
  566. }
  567. func (a *Accesses) UpdateConfigByKey(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  568. w.Header().Set("Content-Type", "application/json")
  569. w.Header().Set("Access-Control-Allow-Origin", "*")
  570. data, err := a.CloudProvider.UpdateConfig(r.Body, "")
  571. if err != nil {
  572. w.Write(WrapData(data, err))
  573. return
  574. }
  575. w.Write(WrapData(data, err))
  576. return
  577. }
  578. func (a *Accesses) ManagementPlatform(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  579. w.Header().Set("Content-Type", "application/json")
  580. w.Header().Set("Access-Control-Allow-Origin", "*")
  581. data, err := a.CloudProvider.GetManagementPlatform()
  582. if err != nil {
  583. w.Write(WrapData(data, err))
  584. return
  585. }
  586. w.Write(WrapData(data, err))
  587. return
  588. }
  589. func (a *Accesses) ClusterInfo(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  590. w.Header().Set("Content-Type", "application/json")
  591. w.Header().Set("Access-Control-Allow-Origin", "*")
  592. data := a.ClusterInfoProvider.GetClusterInfo()
  593. w.Write(WrapData(data, nil))
  594. }
  595. func (a *Accesses) GetClusterInfoMap(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  596. w.Header().Set("Content-Type", "application/json")
  597. w.Header().Set("Access-Control-Allow-Origin", "*")
  598. data := a.ClusterMap.AsMap()
  599. w.Write(WrapData(data, nil))
  600. }
  601. func (a *Accesses) GetServiceAccountStatus(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
  602. w.Header().Set("Content-Type", "application/json")
  603. w.Header().Set("Access-Control-Allow-Origin", "*")
  604. w.Write(WrapData(a.CloudProvider.ServiceAccountStatus(), nil))
  605. }
  606. func (a *Accesses) GetPricingSourceStatus(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
  607. w.Header().Set("Content-Type", "application/json")
  608. w.Header().Set("Access-Control-Allow-Origin", "*")
  609. w.Write(WrapData(a.CloudProvider.PricingSourceStatus(), nil))
  610. }
  611. func (a *Accesses) GetPricingSourceCounts(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
  612. w.Header().Set("Content-Type", "application/json")
  613. w.Header().Set("Access-Control-Allow-Origin", "*")
  614. w.Write(WrapData(a.Model.GetPricingSourceCounts()))
  615. }
  616. func (a *Accesses) GetPricingSourceSummary(w http.ResponseWriter, r *http.Request, p httprouter.Params) {
  617. w.Header().Set("Content-Type", "application/json")
  618. w.Header().Set("Access-Control-Allow-Origin", "*")
  619. data := a.CloudProvider.PricingSourceSummary()
  620. w.Write(WrapData(data, nil))
  621. }
  622. func (a *Accesses) GetPrometheusMetadata(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
  623. w.Header().Set("Content-Type", "application/json")
  624. w.Header().Set("Access-Control-Allow-Origin", "*")
  625. w.Write(WrapData(prom.Validate(a.PrometheusClient)))
  626. }
  627. func (a *Accesses) PrometheusQuery(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  628. w.Header().Set("Content-Type", "application/json")
  629. w.Header().Set("Access-Control-Allow-Origin", "*")
  630. qp := httputil.NewQueryParams(r.URL.Query())
  631. query := qp.Get("query", "")
  632. if query == "" {
  633. w.Write(WrapData(nil, fmt.Errorf("Query Parameter 'query' is unset'")))
  634. return
  635. }
  636. // Attempt to parse time as either a unix timestamp or as an RFC3339 value
  637. var timeVal time.Time
  638. timeStr := qp.Get("time", "")
  639. if len(timeStr) > 0 {
  640. if t, err := strconv.ParseInt(timeStr, 10, 64); err == nil {
  641. timeVal = time.Unix(t, 0)
  642. } else if t, err := time.Parse(time.RFC3339, timeStr); err == nil {
  643. timeVal = t
  644. }
  645. // If time is given, but not parse-able, return an error
  646. if timeVal.IsZero() {
  647. http.Error(w, fmt.Sprintf("time must be a unix timestamp or RFC3339 value; illegal value given: %s", timeStr), http.StatusBadRequest)
  648. }
  649. }
  650. ctx := prom.NewNamedContext(a.PrometheusClient, prom.FrontendContextName)
  651. body, err := ctx.RawQuery(query, timeVal)
  652. if err != nil {
  653. w.Write(WrapData(nil, fmt.Errorf("Error running query %s. Error: %s", query, err)))
  654. return
  655. }
  656. w.Write(body)
  657. }
  658. func (a *Accesses) PrometheusQueryRange(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  659. w.Header().Set("Content-Type", "application/json")
  660. w.Header().Set("Access-Control-Allow-Origin", "*")
  661. qp := httputil.NewQueryParams(r.URL.Query())
  662. query := qp.Get("query", "")
  663. if query == "" {
  664. fmt.Fprintf(w, "Error parsing query from request parameters.")
  665. return
  666. }
  667. start, end, duration, err := toStartEndStep(qp)
  668. if err != nil {
  669. fmt.Fprintf(w, err.Error())
  670. return
  671. }
  672. ctx := prom.NewNamedContext(a.PrometheusClient, prom.FrontendContextName)
  673. body, err := ctx.RawQueryRange(query, start, end, duration)
  674. if err != nil {
  675. fmt.Fprintf(w, "Error running query %s. Error: %s", query, err)
  676. return
  677. }
  678. w.Write(body)
  679. }
  680. func (a *Accesses) ThanosQuery(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  681. w.Header().Set("Content-Type", "application/json")
  682. w.Header().Set("Access-Control-Allow-Origin", "*")
  683. if !thanos.IsEnabled() {
  684. w.Write(WrapData(nil, fmt.Errorf("ThanosDisabled")))
  685. return
  686. }
  687. qp := httputil.NewQueryParams(r.URL.Query())
  688. query := qp.Get("query", "")
  689. if query == "" {
  690. w.Write(WrapData(nil, fmt.Errorf("Query Parameter 'query' is unset'")))
  691. return
  692. }
  693. // Attempt to parse time as either a unix timestamp or as an RFC3339 value
  694. var timeVal time.Time
  695. timeStr := qp.Get("time", "")
  696. if len(timeStr) > 0 {
  697. if t, err := strconv.ParseInt(timeStr, 10, 64); err == nil {
  698. timeVal = time.Unix(t, 0)
  699. } else if t, err := time.Parse(time.RFC3339, timeStr); err == nil {
  700. timeVal = t
  701. }
  702. // If time is given, but not parse-able, return an error
  703. if timeVal.IsZero() {
  704. http.Error(w, fmt.Sprintf("time must be a unix timestamp or RFC3339 value; illegal value given: %s", timeStr), http.StatusBadRequest)
  705. }
  706. }
  707. ctx := prom.NewNamedContext(a.ThanosClient, prom.FrontendContextName)
  708. body, err := ctx.RawQuery(query, timeVal)
  709. if err != nil {
  710. w.Write(WrapData(nil, fmt.Errorf("Error running query %s. Error: %s", query, err)))
  711. return
  712. }
  713. w.Write(body)
  714. }
  715. func (a *Accesses) ThanosQueryRange(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  716. w.Header().Set("Content-Type", "application/json")
  717. w.Header().Set("Access-Control-Allow-Origin", "*")
  718. if !thanos.IsEnabled() {
  719. w.Write(WrapData(nil, fmt.Errorf("ThanosDisabled")))
  720. return
  721. }
  722. qp := httputil.NewQueryParams(r.URL.Query())
  723. query := qp.Get("query", "")
  724. if query == "" {
  725. fmt.Fprintf(w, "Error parsing query from request parameters.")
  726. return
  727. }
  728. start, end, duration, err := toStartEndStep(qp)
  729. if err != nil {
  730. fmt.Fprintf(w, err.Error())
  731. return
  732. }
  733. ctx := prom.NewNamedContext(a.ThanosClient, prom.FrontendContextName)
  734. body, err := ctx.RawQueryRange(query, start, end, duration)
  735. if err != nil {
  736. fmt.Fprintf(w, "Error running query %s. Error: %s", query, err)
  737. return
  738. }
  739. w.Write(body)
  740. }
  741. // helper for query range proxy requests
  742. func toStartEndStep(qp httputil.QueryParams) (start, end time.Time, step time.Duration, err error) {
  743. var e error
  744. ss := qp.Get("start", "")
  745. es := qp.Get("end", "")
  746. ds := qp.Get("duration", "")
  747. layout := "2006-01-02T15:04:05.000Z"
  748. start, e = time.Parse(layout, ss)
  749. if e != nil {
  750. err = fmt.Errorf("Error parsing time %s. Error: %s", ss, err)
  751. return
  752. }
  753. end, e = time.Parse(layout, es)
  754. if e != nil {
  755. err = fmt.Errorf("Error parsing time %s. Error: %s", es, err)
  756. return
  757. }
  758. step, e = time.ParseDuration(ds)
  759. if e != nil {
  760. err = fmt.Errorf("Error parsing duration %s. Error: %s", ds, err)
  761. return
  762. }
  763. err = nil
  764. return
  765. }
  766. func (a *Accesses) GetPrometheusQueueState(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
  767. w.Header().Set("Content-Type", "application/json")
  768. w.Header().Set("Access-Control-Allow-Origin", "*")
  769. promQueueState, err := prom.GetPrometheusQueueState(a.PrometheusClient)
  770. if err != nil {
  771. w.Write(WrapData(nil, err))
  772. return
  773. }
  774. result := map[string]*prom.PrometheusQueueState{
  775. "prometheus": promQueueState,
  776. }
  777. if thanos.IsEnabled() {
  778. thanosQueueState, err := prom.GetPrometheusQueueState(a.ThanosClient)
  779. if err != nil {
  780. log.Warnf("Error getting Thanos queue state: %s", err)
  781. } else {
  782. result["thanos"] = thanosQueueState
  783. }
  784. }
  785. w.Write(WrapData(result, nil))
  786. }
  787. // GetPrometheusMetrics retrieves availability of Prometheus and Thanos metrics
  788. func (a *Accesses) GetPrometheusMetrics(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
  789. w.Header().Set("Content-Type", "application/json")
  790. w.Header().Set("Access-Control-Allow-Origin", "*")
  791. promMetrics := prom.GetPrometheusMetrics(a.PrometheusClient, "")
  792. result := map[string][]*prom.PrometheusDiagnostic{
  793. "prometheus": promMetrics,
  794. }
  795. if thanos.IsEnabled() {
  796. thanosMetrics := prom.GetPrometheusMetrics(a.ThanosClient, thanos.QueryOffset())
  797. result["thanos"] = thanosMetrics
  798. }
  799. w.Write(WrapData(result, nil))
  800. }
  801. func (a *Accesses) GetAllPersistentVolumes(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  802. w.Header().Set("Content-Type", "application/json")
  803. w.Header().Set("Access-Control-Allow-Origin", "*")
  804. pvList := a.ClusterCache.GetAllPersistentVolumes()
  805. body, err := json.Marshal(wrapAsObjectItems(pvList))
  806. if err != nil {
  807. fmt.Fprintf(w, "Error decoding persistent volumes: "+err.Error())
  808. } else {
  809. w.Write(body)
  810. }
  811. }
  812. func (a *Accesses) GetAllDeployments(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  813. w.Header().Set("Content-Type", "application/json")
  814. w.Header().Set("Access-Control-Allow-Origin", "*")
  815. qp := httputil.NewQueryParams(r.URL.Query())
  816. namespace := qp.Get("namespace", "")
  817. deploymentsList := a.ClusterCache.GetAllDeployments()
  818. // filter for provided namespace
  819. var deployments []*appsv1.Deployment
  820. if namespace == "" {
  821. deployments = deploymentsList
  822. } else {
  823. deployments = []*appsv1.Deployment{}
  824. for _, d := range deploymentsList {
  825. if d.Namespace == namespace {
  826. deployments = append(deployments, d)
  827. }
  828. }
  829. }
  830. body, err := json.Marshal(wrapAsObjectItems(deployments))
  831. if err != nil {
  832. fmt.Fprintf(w, "Error decoding deployment: "+err.Error())
  833. } else {
  834. w.Write(body)
  835. }
  836. }
  837. func (a *Accesses) GetAllStorageClasses(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  838. w.Header().Set("Content-Type", "application/json")
  839. w.Header().Set("Access-Control-Allow-Origin", "*")
  840. scList := a.ClusterCache.GetAllStorageClasses()
  841. body, err := json.Marshal(wrapAsObjectItems(scList))
  842. if err != nil {
  843. fmt.Fprintf(w, "Error decoding storageclasses: "+err.Error())
  844. } else {
  845. w.Write(body)
  846. }
  847. }
  848. func (a *Accesses) GetAllStatefulSets(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  849. w.Header().Set("Content-Type", "application/json")
  850. w.Header().Set("Access-Control-Allow-Origin", "*")
  851. qp := httputil.NewQueryParams(r.URL.Query())
  852. namespace := qp.Get("namespace", "")
  853. statefulSetsList := a.ClusterCache.GetAllStatefulSets()
  854. // filter for provided namespace
  855. var statefulSets []*appsv1.StatefulSet
  856. if namespace == "" {
  857. statefulSets = statefulSetsList
  858. } else {
  859. statefulSets = []*appsv1.StatefulSet{}
  860. for _, ss := range statefulSetsList {
  861. if ss.Namespace == namespace {
  862. statefulSets = append(statefulSets, ss)
  863. }
  864. }
  865. }
  866. body, err := json.Marshal(wrapAsObjectItems(statefulSets))
  867. if err != nil {
  868. fmt.Fprintf(w, "Error decoding deployment: "+err.Error())
  869. } else {
  870. w.Write(body)
  871. }
  872. }
  873. func (a *Accesses) GetAllNodes(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  874. w.Header().Set("Content-Type", "application/json")
  875. w.Header().Set("Access-Control-Allow-Origin", "*")
  876. nodeList := a.ClusterCache.GetAllNodes()
  877. body, err := json.Marshal(wrapAsObjectItems(nodeList))
  878. if err != nil {
  879. fmt.Fprintf(w, "Error decoding nodes: "+err.Error())
  880. } else {
  881. w.Write(body)
  882. }
  883. }
  884. func (a *Accesses) GetAllPods(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  885. w.Header().Set("Content-Type", "application/json")
  886. w.Header().Set("Access-Control-Allow-Origin", "*")
  887. podlist := a.ClusterCache.GetAllPods()
  888. body, err := json.Marshal(wrapAsObjectItems(podlist))
  889. if err != nil {
  890. fmt.Fprintf(w, "Error decoding pods: "+err.Error())
  891. } else {
  892. w.Write(body)
  893. }
  894. }
  895. func (a *Accesses) GetAllNamespaces(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  896. w.Header().Set("Content-Type", "application/json")
  897. w.Header().Set("Access-Control-Allow-Origin", "*")
  898. namespaces := a.ClusterCache.GetAllNamespaces()
  899. body, err := json.Marshal(wrapAsObjectItems(namespaces))
  900. if err != nil {
  901. fmt.Fprintf(w, "Error decoding deployment: "+err.Error())
  902. } else {
  903. w.Write(body)
  904. }
  905. }
  906. func (a *Accesses) GetAllDaemonSets(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  907. w.Header().Set("Content-Type", "application/json")
  908. w.Header().Set("Access-Control-Allow-Origin", "*")
  909. daemonSets := a.ClusterCache.GetAllDaemonSets()
  910. body, err := json.Marshal(wrapAsObjectItems(daemonSets))
  911. if err != nil {
  912. fmt.Fprintf(w, "Error decoding daemon set: "+err.Error())
  913. } else {
  914. w.Write(body)
  915. }
  916. }
  917. func (a *Accesses) GetPod(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  918. w.Header().Set("Content-Type", "application/json")
  919. w.Header().Set("Access-Control-Allow-Origin", "*")
  920. podName := ps.ByName("name")
  921. podNamespace := ps.ByName("namespace")
  922. // TODO: ClusterCache API could probably afford to have some better filtering
  923. allPods := a.ClusterCache.GetAllPods()
  924. for _, pod := range allPods {
  925. for _, container := range pod.Spec.Containers {
  926. container.Env = make([]v1.EnvVar, 0)
  927. }
  928. if pod.Namespace == podNamespace && pod.Name == podName {
  929. body, err := json.Marshal(pod)
  930. if err != nil {
  931. fmt.Fprintf(w, "Error decoding pod: "+err.Error())
  932. } else {
  933. w.Write(body)
  934. }
  935. return
  936. }
  937. }
  938. fmt.Fprintf(w, "Pod not found\n")
  939. }
  940. func (a *Accesses) PrometheusRecordingRules(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  941. w.Header().Set("Content-Type", "application/json")
  942. w.Header().Set("Access-Control-Allow-Origin", "*")
  943. u := a.PrometheusClient.URL(epRules, nil)
  944. req, err := http.NewRequest(http.MethodGet, u.String(), nil)
  945. if err != nil {
  946. fmt.Fprintf(w, "Error creating Prometheus rule request: "+err.Error())
  947. }
  948. _, body, err := a.PrometheusClient.Do(r.Context(), req)
  949. if err != nil {
  950. fmt.Fprintf(w, "Error making Prometheus rule request: "+err.Error())
  951. } else {
  952. w.Write(body)
  953. }
  954. }
  955. func (a *Accesses) PrometheusConfig(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  956. w.Header().Set("Content-Type", "application/json")
  957. w.Header().Set("Access-Control-Allow-Origin", "*")
  958. pConfig := map[string]string{
  959. "address": env.GetPrometheusServerEndpoint(),
  960. }
  961. body, err := json.Marshal(pConfig)
  962. if err != nil {
  963. fmt.Fprintf(w, "Error marshalling prometheus config")
  964. } else {
  965. w.Write(body)
  966. }
  967. }
  968. func (a *Accesses) PrometheusTargets(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  969. w.Header().Set("Content-Type", "application/json")
  970. w.Header().Set("Access-Control-Allow-Origin", "*")
  971. u := a.PrometheusClient.URL(epTargets, nil)
  972. req, err := http.NewRequest(http.MethodGet, u.String(), nil)
  973. if err != nil {
  974. fmt.Fprintf(w, "Error creating Prometheus rule request: "+err.Error())
  975. }
  976. _, body, err := a.PrometheusClient.Do(r.Context(), req)
  977. if err != nil {
  978. fmt.Fprintf(w, "Error making Prometheus rule request: "+err.Error())
  979. } else {
  980. w.Write(body)
  981. }
  982. }
  983. func (a *Accesses) GetOrphanedPods(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  984. w.Header().Set("Content-Type", "application/json")
  985. w.Header().Set("Access-Control-Allow-Origin", "*")
  986. podlist := a.ClusterCache.GetAllPods()
  987. var lonePods []*v1.Pod
  988. for _, pod := range podlist {
  989. if len(pod.OwnerReferences) == 0 {
  990. lonePods = append(lonePods, pod)
  991. }
  992. }
  993. body, err := json.Marshal(lonePods)
  994. if err != nil {
  995. fmt.Fprintf(w, "Error decoding pod: "+err.Error())
  996. } else {
  997. w.Write(body)
  998. }
  999. }
  1000. func (a *Accesses) GetInstallNamespace(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  1001. w.Header().Set("Content-Type", "application/json")
  1002. w.Header().Set("Access-Control-Allow-Origin", "*")
  1003. ns := env.GetKubecostNamespace()
  1004. w.Write([]byte(ns))
  1005. }
  1006. type InstallInfo struct {
  1007. Containers []ContainerInfo `json:"containers"`
  1008. ClusterInfo map[string]string `json:"clusterInfo"`
  1009. Version string `json:"version"`
  1010. }
  1011. type ContainerInfo struct {
  1012. ContainerName string `json:"containerName"`
  1013. Image string `json:"image"`
  1014. ImageID string `json:"imageID"`
  1015. StartTime string `json:"startTime"`
  1016. Restarts int32 `json:"restarts"`
  1017. }
  1018. func (a *Accesses) GetInstallInfo(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  1019. w.Header().Set("Content-Type", "application/json")
  1020. w.Header().Set("Access-Control-Allow-Origin", "*")
  1021. pods, err := a.KubeClientSet.CoreV1().Pods(env.GetKubecostNamespace()).List(context.Background(), metav1.ListOptions{
  1022. LabelSelector: "app=cost-analyzer",
  1023. FieldSelector: "status.phase=Running",
  1024. Limit: 1,
  1025. })
  1026. if err != nil {
  1027. writeErrorResponse(w, 500, fmt.Sprintf("Unable to list pods: %s", err.Error()))
  1028. return
  1029. }
  1030. info := InstallInfo{
  1031. ClusterInfo: make(map[string]string),
  1032. Version: version.FriendlyVersion(),
  1033. }
  1034. // If we have zero pods either something is weird with the install since the app selector is not exposed in the helm
  1035. // chart or more likely we are running locally - in either case Images field will return as null
  1036. if len(pods.Items) > 0 {
  1037. for _, pod := range pods.Items {
  1038. for _, container := range pod.Status.ContainerStatuses {
  1039. c := ContainerInfo{
  1040. ContainerName: container.Name,
  1041. Image: container.Image,
  1042. ImageID: container.ImageID,
  1043. StartTime: pod.Status.StartTime.String(),
  1044. Restarts: container.RestartCount,
  1045. }
  1046. info.Containers = append(info.Containers, c)
  1047. }
  1048. }
  1049. }
  1050. nodes := a.ClusterCache.GetAllNodes()
  1051. cachePods := a.ClusterCache.GetAllPods()
  1052. info.ClusterInfo["nodeCount"] = strconv.Itoa(len(nodes))
  1053. info.ClusterInfo["podCount"] = strconv.Itoa(len(cachePods))
  1054. body, err := json.Marshal(info)
  1055. if err != nil {
  1056. writeErrorResponse(w, 500, fmt.Sprintf("Error decoding pod: %s", err.Error()))
  1057. return
  1058. }
  1059. w.Write(body)
  1060. }
  1061. // logsFor pulls the logs for a specific pod, namespace, and container
  1062. func logsFor(c kubernetes.Interface, namespace string, pod string, container string, dur time.Duration, ctx context.Context) (string, error) {
  1063. since := time.Now().UTC().Add(-dur)
  1064. logOpts := v1.PodLogOptions{
  1065. SinceTime: &metav1.Time{Time: since},
  1066. }
  1067. if container != "" {
  1068. logOpts.Container = container
  1069. }
  1070. req := c.CoreV1().Pods(namespace).GetLogs(pod, &logOpts)
  1071. reader, err := req.Stream(ctx)
  1072. if err != nil {
  1073. return "", err
  1074. }
  1075. podLogs, err := io.ReadAll(reader)
  1076. if err != nil {
  1077. return "", err
  1078. }
  1079. // If color is already disabled then we don't need to process the logs
  1080. // to drop ANSI colors
  1081. if !viper.GetBool("disable-log-color") {
  1082. podLogs = ANSIRegex.ReplaceAll(podLogs, []byte{})
  1083. }
  1084. return string(podLogs), nil
  1085. }
  1086. func (a *Accesses) GetPodLogs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  1087. w.Header().Set("Content-Type", "application/json")
  1088. w.Header().Set("Access-Control-Allow-Origin", "*")
  1089. qp := httputil.NewQueryParams(r.URL.Query())
  1090. ns := qp.Get("namespace", env.GetKubecostNamespace())
  1091. pod := qp.Get("pod", "")
  1092. selector := qp.Get("selector", "")
  1093. container := qp.Get("container", "")
  1094. since := qp.Get("since", "24h")
  1095. sinceDuration, err := time.ParseDuration(since)
  1096. if err != nil {
  1097. fmt.Fprintf(w, "Invalid Duration String: "+err.Error())
  1098. return
  1099. }
  1100. var logResult string
  1101. appendLog := func(ns string, pod string, container string, l string) {
  1102. if l == "" {
  1103. return
  1104. }
  1105. logResult += fmt.Sprintf("%s\n| %s:%s:%s\n%s\n%s\n\n", LogSeparator, ns, pod, container, LogSeparator, l)
  1106. }
  1107. if pod != "" {
  1108. pd, err := a.KubeClientSet.CoreV1().Pods(ns).Get(r.Context(), pod, metav1.GetOptions{})
  1109. if err != nil {
  1110. fmt.Fprintf(w, "Error Finding Pod: "+err.Error())
  1111. return
  1112. }
  1113. if container != "" {
  1114. var foundContainer bool
  1115. for _, cont := range pd.Spec.Containers {
  1116. if strings.EqualFold(cont.Name, container) {
  1117. foundContainer = true
  1118. break
  1119. }
  1120. }
  1121. if !foundContainer {
  1122. fmt.Fprintf(w, "Could not find container: "+container)
  1123. return
  1124. }
  1125. }
  1126. logs, err := logsFor(a.KubeClientSet, ns, pod, container, sinceDuration, r.Context())
  1127. if err != nil {
  1128. fmt.Fprintf(w, "Error Getting Logs: "+err.Error())
  1129. return
  1130. }
  1131. appendLog(ns, pod, container, logs)
  1132. w.Write([]byte(logResult))
  1133. return
  1134. }
  1135. if selector != "" {
  1136. pods, err := a.KubeClientSet.CoreV1().Pods(ns).List(r.Context(), metav1.ListOptions{LabelSelector: selector})
  1137. if err != nil {
  1138. fmt.Fprintf(w, "Error Finding Pod: "+err.Error())
  1139. return
  1140. }
  1141. for _, pd := range pods.Items {
  1142. for _, cont := range pd.Spec.Containers {
  1143. logs, err := logsFor(a.KubeClientSet, ns, pd.Name, cont.Name, sinceDuration, r.Context())
  1144. if err != nil {
  1145. continue
  1146. }
  1147. appendLog(ns, pd.Name, cont.Name, logs)
  1148. }
  1149. }
  1150. }
  1151. w.Write([]byte(logResult))
  1152. }
  1153. func (a *Accesses) AddServiceKey(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  1154. w.Header().Set("Content-Type", "application/json")
  1155. w.Header().Set("Access-Control-Allow-Origin", "*")
  1156. r.ParseForm()
  1157. key := r.PostForm.Get("key")
  1158. k := []byte(key)
  1159. err := os.WriteFile(path.Join(env.GetConfigPathWithDefault(env.DefaultConfigMountPath), "key.json"), k, 0644)
  1160. if err != nil {
  1161. fmt.Fprintf(w, "Error writing service key: "+err.Error())
  1162. }
  1163. w.WriteHeader(http.StatusOK)
  1164. }
  1165. func (a *Accesses) GetHelmValues(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  1166. w.Header().Set("Content-Type", "application/json")
  1167. w.Header().Set("Access-Control-Allow-Origin", "*")
  1168. encodedValues := sysenv.Get("HELM_VALUES", "")
  1169. if encodedValues == "" {
  1170. fmt.Fprintf(w, "Values reporting disabled")
  1171. return
  1172. }
  1173. result, err := base64.StdEncoding.DecodeString(encodedValues)
  1174. if err != nil {
  1175. fmt.Fprintf(w, "Failed to decode encoded values: %s", err)
  1176. return
  1177. }
  1178. w.Write(result)
  1179. }
  1180. func (a *Accesses) Status(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  1181. w.Header().Set("Content-Type", "application/json")
  1182. w.Header().Set("Access-Control-Allow-Origin", "*")
  1183. promServer := env.GetPrometheusServerEndpoint()
  1184. api := prometheusAPI.NewAPI(a.PrometheusClient)
  1185. result, err := api.Buildinfo(r.Context())
  1186. if err != nil {
  1187. fmt.Fprintf(w, "Using Prometheus at "+promServer+". Error: "+err.Error())
  1188. } else {
  1189. fmt.Fprintf(w, "Using Prometheus at "+promServer+". Version: "+result.Version)
  1190. }
  1191. }
  1192. type LogLevelRequestResponse struct {
  1193. Level string `json:"level"`
  1194. }
  1195. func (a *Accesses) GetLogLevel(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  1196. w.Header().Set("Content-Type", "application/json")
  1197. w.Header().Set("Access-Control-Allow-Origin", "*")
  1198. level := log.GetLogLevel()
  1199. llrr := LogLevelRequestResponse{
  1200. Level: level,
  1201. }
  1202. body, err := json.Marshal(llrr)
  1203. if err != nil {
  1204. http.Error(w, fmt.Sprintf("unable to retrive log level"), http.StatusInternalServerError)
  1205. return
  1206. }
  1207. _, err = w.Write(body)
  1208. if err != nil {
  1209. http.Error(w, fmt.Sprintf("unable to write response: %s", body), http.StatusInternalServerError)
  1210. return
  1211. }
  1212. }
  1213. func (a *Accesses) SetLogLevel(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  1214. params := LogLevelRequestResponse{}
  1215. err := json.NewDecoder(r.Body).Decode(&params)
  1216. if err != nil {
  1217. http.Error(w, fmt.Sprintf("unable to decode request body, error: %s", err), http.StatusBadRequest)
  1218. return
  1219. }
  1220. err = log.SetLogLevel(params.Level)
  1221. if err != nil {
  1222. http.Error(w, fmt.Sprintf("level must be a valid log level according to zerolog; level given: %s, error: %s", params.Level, err), http.StatusBadRequest)
  1223. return
  1224. }
  1225. w.WriteHeader(http.StatusOK)
  1226. }
  1227. // captures the panic event in sentry
  1228. func capturePanicEvent(err string, stack string) {
  1229. msg := fmt.Sprintf("Panic: %s\nStackTrace: %s\n", err, stack)
  1230. log.Infof(msg)
  1231. sentry.CurrentHub().CaptureEvent(&sentry.Event{
  1232. Level: sentry.LevelError,
  1233. Message: msg,
  1234. })
  1235. sentry.Flush(5 * time.Second)
  1236. }
  1237. // handle any panics reported by the errors package
  1238. func handlePanic(p errors.Panic) bool {
  1239. err := p.Error
  1240. if err != nil {
  1241. if err, ok := err.(error); ok {
  1242. capturePanicEvent(err.Error(), p.Stack)
  1243. }
  1244. if err, ok := err.(string); ok {
  1245. capturePanicEvent(err, p.Stack)
  1246. }
  1247. }
  1248. // Return true to recover iff the type is http, otherwise allow kubernetes
  1249. // to recover.
  1250. return p.Type == errors.PanicTypeHTTP
  1251. }
  1252. func Initialize(additionalConfigWatchers ...*watcher.ConfigMapWatcher) *Accesses {
  1253. configWatchers := watcher.NewConfigMapWatchers(additionalConfigWatchers...)
  1254. var err error
  1255. if errorReportingEnabled {
  1256. err = sentry.Init(sentry.ClientOptions{Release: version.FriendlyVersion()})
  1257. if err != nil {
  1258. log.Infof("Failed to initialize sentry for error reporting")
  1259. } else {
  1260. err = errors.SetPanicHandler(handlePanic)
  1261. if err != nil {
  1262. log.Infof("Failed to set panic handler: %s", err)
  1263. }
  1264. }
  1265. }
  1266. address := env.GetPrometheusServerEndpoint()
  1267. if address == "" {
  1268. log.Fatalf("No address for prometheus set in $%s. Aborting.", env.PrometheusServerEndpointEnvVar)
  1269. }
  1270. queryConcurrency := env.GetMaxQueryConcurrency()
  1271. log.Infof("Prometheus/Thanos Client Max Concurrency set to %d", queryConcurrency)
  1272. timeout := 120 * time.Second
  1273. keepAlive := 120 * time.Second
  1274. tlsHandshakeTimeout := 10 * time.Second
  1275. scrapeInterval := env.GetKubecostScrapeInterval()
  1276. var rateLimitRetryOpts *prom.RateLimitRetryOpts = nil
  1277. if env.IsPrometheusRetryOnRateLimitResponse() {
  1278. rateLimitRetryOpts = &prom.RateLimitRetryOpts{
  1279. MaxRetries: env.GetPrometheusRetryOnRateLimitMaxRetries(),
  1280. DefaultRetryWait: env.GetPrometheusRetryOnRateLimitDefaultWait(),
  1281. }
  1282. }
  1283. promCli, err := prom.NewPrometheusClient(address, &prom.PrometheusClientConfig{
  1284. Timeout: timeout,
  1285. KeepAlive: keepAlive,
  1286. TLSHandshakeTimeout: tlsHandshakeTimeout,
  1287. TLSInsecureSkipVerify: env.GetInsecureSkipVerify(),
  1288. RateLimitRetryOpts: rateLimitRetryOpts,
  1289. Auth: &prom.ClientAuth{
  1290. Username: env.GetDBBasicAuthUsername(),
  1291. Password: env.GetDBBasicAuthUserPassword(),
  1292. BearerToken: env.GetDBBearerToken(),
  1293. },
  1294. QueryConcurrency: queryConcurrency,
  1295. QueryLogFile: "",
  1296. HeaderXScopeOrgId: env.GetPrometheusHeaderXScopeOrgId(),
  1297. })
  1298. if err != nil {
  1299. log.Fatalf("Failed to create prometheus client, Error: %v", err)
  1300. }
  1301. m, err := prom.Validate(promCli)
  1302. if err != nil || !m.Running {
  1303. if err != nil {
  1304. log.Errorf("Failed to query prometheus at %s. Error: %s . Troubleshooting help available at: %s", address, err.Error(), prom.PrometheusTroubleshootingURL)
  1305. } else if !m.Running {
  1306. log.Errorf("Prometheus at %s is not running. Troubleshooting help available at: %s", address, prom.PrometheusTroubleshootingURL)
  1307. }
  1308. } else {
  1309. log.Infof("Success: retrieved the 'up' query against prometheus at: " + address)
  1310. }
  1311. api := prometheusAPI.NewAPI(promCli)
  1312. _, err = api.Buildinfo(context.Background())
  1313. if err != nil {
  1314. log.Infof("No valid prometheus config file at %s. Error: %s . Troubleshooting help available at: %s. Ignore if using cortex/mimir/thanos here.", address, err.Error(), prom.PrometheusTroubleshootingURL)
  1315. } else {
  1316. log.Infof("Retrieved a prometheus config file from: %s", address)
  1317. }
  1318. if scrapeInterval == 0 {
  1319. scrapeInterval = time.Minute
  1320. // Lookup scrape interval for kubecost job, update if found
  1321. si, err := prom.ScrapeIntervalFor(promCli, env.GetKubecostJobName())
  1322. if err == nil {
  1323. scrapeInterval = si
  1324. }
  1325. }
  1326. log.Infof("Using scrape interval of %f", scrapeInterval.Seconds())
  1327. // Kubernetes API setup
  1328. kubeClientset, err := kubeconfig.LoadKubeClient("")
  1329. if err != nil {
  1330. log.Fatalf("Failed to build Kubernetes client: %s", err.Error())
  1331. }
  1332. // Create ConfigFileManager for synchronization of shared configuration
  1333. confManager := config.NewConfigFileManager(&config.ConfigFileManagerOpts{
  1334. BucketStoreConfig: env.GetKubecostConfigBucket(),
  1335. LocalConfigPath: "/",
  1336. })
  1337. configPrefix := env.GetConfigPathWithDefault("/var/configs/")
  1338. // Create Kubernetes Cluster Cache + Watchers
  1339. var k8sCache clustercache.ClusterCache
  1340. if env.IsClusterCacheFileEnabled() {
  1341. importLocation := confManager.ConfigFileAt(path.Join(configPrefix, "cluster-cache.json"))
  1342. k8sCache = clustercache.NewClusterImporter(importLocation)
  1343. } else {
  1344. k8sCache = clustercache.NewKubernetesClusterCache(kubeClientset)
  1345. }
  1346. k8sCache.Run()
  1347. cloudProviderKey := env.GetCloudProviderAPIKey()
  1348. cloudProvider, err := provider.NewProvider(k8sCache, cloudProviderKey, confManager)
  1349. if err != nil {
  1350. panic(err.Error())
  1351. }
  1352. // Append the pricing config watcher
  1353. configWatchers.AddWatcher(provider.ConfigWatcherFor(cloudProvider))
  1354. configWatchers.AddWatcher(metrics.GetMetricsConfigWatcher())
  1355. watchConfigFunc := configWatchers.ToWatchFunc()
  1356. watchedConfigs := configWatchers.GetWatchedConfigs()
  1357. kubecostNamespace := env.GetKubecostNamespace()
  1358. // We need an initial invocation because the init of the cache has happened before we had access to the provider.
  1359. for _, cw := range watchedConfigs {
  1360. configs, err := kubeClientset.CoreV1().ConfigMaps(kubecostNamespace).Get(context.Background(), cw, metav1.GetOptions{})
  1361. if err != nil {
  1362. log.Infof("No %s configmap found at install time, using existing configs: %s", cw, err.Error())
  1363. } else {
  1364. log.Infof("Found configmap %s, watching...", configs.Name)
  1365. watchConfigFunc(configs)
  1366. }
  1367. }
  1368. k8sCache.SetConfigMapUpdateFunc(watchConfigFunc)
  1369. remoteEnabled := env.IsRemoteEnabled()
  1370. if remoteEnabled {
  1371. info, err := cloudProvider.ClusterInfo()
  1372. log.Infof("Saving cluster with id:'%s', and name:'%s' to durable storage", info["id"], info["name"])
  1373. if err != nil {
  1374. log.Infof("Error saving cluster id %s", err.Error())
  1375. }
  1376. _, _, err = utils.GetOrCreateClusterMeta(info["id"], info["name"])
  1377. if err != nil {
  1378. log.Infof("Unable to set cluster id '%s' for cluster '%s', %s", info["id"], info["name"], err.Error())
  1379. }
  1380. }
  1381. // Thanos Client
  1382. var thanosClient prometheus.Client
  1383. if thanos.IsEnabled() {
  1384. thanosAddress := thanos.QueryURL()
  1385. if thanosAddress != "" {
  1386. thanosCli, _ := thanos.NewThanosClient(thanosAddress, &prom.PrometheusClientConfig{
  1387. Timeout: timeout,
  1388. KeepAlive: keepAlive,
  1389. TLSHandshakeTimeout: tlsHandshakeTimeout,
  1390. TLSInsecureSkipVerify: env.GetInsecureSkipVerify(),
  1391. RateLimitRetryOpts: rateLimitRetryOpts,
  1392. Auth: &prom.ClientAuth{
  1393. Username: env.GetMultiClusterBasicAuthUsername(),
  1394. Password: env.GetMultiClusterBasicAuthPassword(),
  1395. BearerToken: env.GetMultiClusterBearerToken(),
  1396. },
  1397. QueryConcurrency: queryConcurrency,
  1398. QueryLogFile: env.GetQueryLoggingFile(),
  1399. })
  1400. _, err = prom.Validate(thanosCli)
  1401. if err != nil {
  1402. log.Warnf("Failed to query Thanos at %s. Error: %s.", thanosAddress, err.Error())
  1403. thanosClient = thanosCli
  1404. } else {
  1405. log.Infof("Success: retrieved the 'up' query against Thanos at: " + thanosAddress)
  1406. thanosClient = thanosCli
  1407. }
  1408. } else {
  1409. log.Infof("Error resolving environment variable: $%s", env.ThanosQueryUrlEnvVar)
  1410. }
  1411. }
  1412. // ClusterInfo Provider to provide the cluster map with local and remote cluster data
  1413. var clusterInfoProvider clusters.ClusterInfoProvider
  1414. if env.IsClusterInfoFileEnabled() {
  1415. clusterInfoFile := confManager.ConfigFileAt(path.Join(configPrefix, "cluster-info.json"))
  1416. clusterInfoProvider = NewConfiguredClusterInfoProvider(clusterInfoFile)
  1417. } else {
  1418. clusterInfoProvider = NewLocalClusterInfoProvider(kubeClientset, cloudProvider)
  1419. }
  1420. // Initialize ClusterMap for maintaining ClusterInfo by ClusterID
  1421. var clusterMap clusters.ClusterMap
  1422. if thanosClient != nil {
  1423. clusterMap = clustermap.NewClusterMap(thanosClient, clusterInfoProvider, 10*time.Minute)
  1424. } else {
  1425. clusterMap = clustermap.NewClusterMap(promCli, clusterInfoProvider, 5*time.Minute)
  1426. }
  1427. // cache responses from model and aggregation for a default of 10 minutes;
  1428. // clear expired responses every 20 minutes
  1429. aggregateCache := cache.New(time.Minute*10, time.Minute*20)
  1430. costDataCache := cache.New(time.Minute*10, time.Minute*20)
  1431. clusterCostsCache := cache.New(cache.NoExpiration, cache.NoExpiration)
  1432. outOfClusterCache := cache.New(time.Minute*5, time.Minute*10)
  1433. settingsCache := cache.New(cache.NoExpiration, cache.NoExpiration)
  1434. // query durations that should be cached longer should be registered here
  1435. // use relatively prime numbers to minimize likelihood of synchronized
  1436. // attempts at cache warming
  1437. day := 24 * time.Hour
  1438. cacheExpiration := map[time.Duration]time.Duration{
  1439. day: maxCacheMinutes1d * time.Minute,
  1440. 2 * day: maxCacheMinutes2d * time.Minute,
  1441. 7 * day: maxCacheMinutes7d * time.Minute,
  1442. 30 * day: maxCacheMinutes30d * time.Minute,
  1443. }
  1444. var pc prometheus.Client
  1445. if thanosClient != nil {
  1446. pc = thanosClient
  1447. } else {
  1448. pc = promCli
  1449. }
  1450. costModel := NewCostModel(pc, cloudProvider, k8sCache, clusterMap, scrapeInterval)
  1451. metricsEmitter := NewCostModelMetricsEmitter(promCli, k8sCache, cloudProvider, clusterInfoProvider, costModel)
  1452. a := &Accesses{
  1453. Router: httprouter.New(),
  1454. PrometheusClient: promCli,
  1455. ThanosClient: thanosClient,
  1456. KubeClientSet: kubeClientset,
  1457. ClusterCache: k8sCache,
  1458. ClusterMap: clusterMap,
  1459. CloudProvider: cloudProvider,
  1460. CloudConfigController: cloudconfig.NewController(cloudProvider),
  1461. ConfigFileManager: confManager,
  1462. ClusterInfoProvider: clusterInfoProvider,
  1463. Model: costModel,
  1464. MetricsEmitter: metricsEmitter,
  1465. AggregateCache: aggregateCache,
  1466. CostDataCache: costDataCache,
  1467. ClusterCostsCache: clusterCostsCache,
  1468. OutOfClusterCache: outOfClusterCache,
  1469. SettingsCache: settingsCache,
  1470. CacheExpiration: cacheExpiration,
  1471. httpServices: services.NewCostModelServices(),
  1472. }
  1473. // Use the Accesses instance, itself, as the CostModelAggregator. This is
  1474. // confusing and unconventional, but necessary so that we can swap it
  1475. // out for the ETL-adapted version elsewhere.
  1476. // TODO clean this up once ETL is open-sourced.
  1477. a.AggAPI = a
  1478. // Initialize mechanism for subscribing to settings changes
  1479. a.InitializeSettingsPubSub()
  1480. err = a.CloudProvider.DownloadPricingData()
  1481. if err != nil {
  1482. log.Infof("Failed to download pricing data: " + err.Error())
  1483. }
  1484. // Warm the aggregate cache unless explicitly set to false
  1485. if env.IsCacheWarmingEnabled() {
  1486. log.Infof("Init: AggregateCostModel cache warming enabled")
  1487. a.warmAggregateCostModelCache()
  1488. } else {
  1489. log.Infof("Init: AggregateCostModel cache warming disabled")
  1490. }
  1491. if !env.IsKubecostMetricsPodEnabled() {
  1492. a.MetricsEmitter.Start()
  1493. }
  1494. log.Infof("Custom Costs enabled: %t", env.IsCustomCostEnabled())
  1495. if env.IsCustomCostEnabled() {
  1496. hourlyRepo := customcost.NewMemoryRepository()
  1497. dailyRepo := customcost.NewMemoryRepository()
  1498. ingConfig := customcost.DefaultIngestorConfiguration()
  1499. var err error
  1500. a.CustomCostPipelineService, err = customcost.NewPipelineService(hourlyRepo, dailyRepo, ingConfig)
  1501. if err != nil {
  1502. log.Errorf("error instantiating custom cost pipeline service: %v", err)
  1503. return nil
  1504. }
  1505. customCostQuerier := customcost.NewRepositoryQuerier(hourlyRepo, dailyRepo, ingConfig.HourlyDuration, ingConfig.DailyDuration)
  1506. a.CustomCostQueryService = customcost.NewQueryService(customCostQuerier)
  1507. }
  1508. a.Router.GET("/costDataModel", a.CostDataModel)
  1509. a.Router.GET("/costDataModelRange", a.CostDataModelRange)
  1510. a.Router.GET("/aggregatedCostModel", a.AggregateCostModelHandler)
  1511. a.Router.GET("/allocation/compute", a.ComputeAllocationHandler)
  1512. a.Router.GET("/allocation/compute/summary", a.ComputeAllocationHandlerSummary)
  1513. a.Router.GET("/allNodePricing", a.GetAllNodePricing)
  1514. a.Router.POST("/refreshPricing", a.RefreshPricingData)
  1515. a.Router.GET("/clusterCostsOverTime", a.ClusterCostsOverTime)
  1516. a.Router.GET("/clusterCosts", a.ClusterCosts)
  1517. a.Router.GET("/clusterCostsFromCache", a.ClusterCostsFromCacheHandler)
  1518. a.Router.GET("/validatePrometheus", a.GetPrometheusMetadata)
  1519. a.Router.GET("/managementPlatform", a.ManagementPlatform)
  1520. a.Router.GET("/clusterInfo", a.ClusterInfo)
  1521. a.Router.GET("/clusterInfoMap", a.GetClusterInfoMap)
  1522. a.Router.GET("/serviceAccountStatus", a.GetServiceAccountStatus)
  1523. a.Router.GET("/pricingSourceStatus", a.GetPricingSourceStatus)
  1524. a.Router.GET("/pricingSourceSummary", a.GetPricingSourceSummary)
  1525. a.Router.GET("/pricingSourceCounts", a.GetPricingSourceCounts)
  1526. // endpoints migrated from server
  1527. a.Router.GET("/allPersistentVolumes", a.GetAllPersistentVolumes)
  1528. a.Router.GET("/allDeployments", a.GetAllDeployments)
  1529. a.Router.GET("/allStorageClasses", a.GetAllStorageClasses)
  1530. a.Router.GET("/allStatefulSets", a.GetAllStatefulSets)
  1531. a.Router.GET("/allNodes", a.GetAllNodes)
  1532. a.Router.GET("/allPods", a.GetAllPods)
  1533. a.Router.GET("/allNamespaces", a.GetAllNamespaces)
  1534. a.Router.GET("/allDaemonSets", a.GetAllDaemonSets)
  1535. a.Router.GET("/pod/:namespace/:name", a.GetPod)
  1536. a.Router.GET("/prometheusRecordingRules", a.PrometheusRecordingRules)
  1537. a.Router.GET("/prometheusConfig", a.PrometheusConfig)
  1538. a.Router.GET("/prometheusTargets", a.PrometheusTargets)
  1539. a.Router.GET("/orphanedPods", a.GetOrphanedPods)
  1540. a.Router.GET("/installNamespace", a.GetInstallNamespace)
  1541. a.Router.GET("/installInfo", a.GetInstallInfo)
  1542. a.Router.GET("/podLogs", a.GetPodLogs)
  1543. a.Router.POST("/serviceKey", a.AddServiceKey)
  1544. a.Router.GET("/helmValues", a.GetHelmValues)
  1545. a.Router.GET("/status", a.Status)
  1546. // prom query proxies
  1547. a.Router.GET("/prometheusQuery", a.PrometheusQuery)
  1548. a.Router.GET("/prometheusQueryRange", a.PrometheusQueryRange)
  1549. a.Router.GET("/thanosQuery", a.ThanosQuery)
  1550. a.Router.GET("/thanosQueryRange", a.ThanosQueryRange)
  1551. // diagnostics
  1552. a.Router.GET("/diagnostics/requestQueue", a.GetPrometheusQueueState)
  1553. a.Router.GET("/diagnostics/prometheusMetrics", a.GetPrometheusMetrics)
  1554. a.Router.GET("/logs/level", a.GetLogLevel)
  1555. a.Router.POST("/logs/level", a.SetLogLevel)
  1556. a.Router.GET("/cloud/config/export", a.CloudConfigController.GetExportConfigHandler())
  1557. a.Router.GET("/cloud/config/enable", a.CloudConfigController.GetEnableConfigHandler())
  1558. a.Router.GET("/cloud/config/disable", a.CloudConfigController.GetDisableConfigHandler())
  1559. a.Router.GET("/cloud/config/delete", a.CloudConfigController.GetDeleteConfigHandler())
  1560. if env.IsCustomCostEnabled() {
  1561. a.Router.GET("/customCost/total", a.CustomCostQueryService.GetCustomCostTotalHandler())
  1562. a.Router.GET("/customCost/timeseries", a.CustomCostQueryService.GetCustomCostTimeseriesHandler())
  1563. }
  1564. // this endpoint is intentionally left out of the "if env.IsCustomCostEnabled()" conditional; in the handler, it is
  1565. // valid for CustomCostPipelineService to be nil
  1566. a.Router.GET("/customCost/status", a.CustomCostPipelineService.GetCustomCostStatusHandler())
  1567. a.httpServices.RegisterAll(a.Router)
  1568. return a
  1569. }
  1570. func InitializeWithoutKubernetes() *Accesses {
  1571. var err error
  1572. if errorReportingEnabled {
  1573. err = sentry.Init(sentry.ClientOptions{Release: version.FriendlyVersion()})
  1574. if err != nil {
  1575. log.Infof("Failed to initialize sentry for error reporting")
  1576. } else {
  1577. err = errors.SetPanicHandler(handlePanic)
  1578. if err != nil {
  1579. log.Infof("Failed to set panic handler: %s", err)
  1580. }
  1581. }
  1582. }
  1583. a := &Accesses{
  1584. Router: httprouter.New(),
  1585. CloudConfigController: cloudconfig.NewController(nil),
  1586. httpServices: services.NewCostModelServices(),
  1587. }
  1588. a.Router.GET("/logs/level", a.GetLogLevel)
  1589. a.Router.POST("/logs/level", a.SetLogLevel)
  1590. a.httpServices.RegisterAll(a.Router)
  1591. return a
  1592. }
  1593. func writeErrorResponse(w http.ResponseWriter, code int, message string) {
  1594. out := map[string]string{
  1595. "message": message,
  1596. }
  1597. bytes, err := json.Marshal(out)
  1598. if err != nil {
  1599. w.Header().Set("Content-Type", "text/plain")
  1600. w.WriteHeader(500)
  1601. fmt.Fprint(w, "unable to marshall json for error")
  1602. log.Warnf("Failed to marshall JSON for error response: %s", err.Error())
  1603. return
  1604. }
  1605. w.WriteHeader(code)
  1606. fmt.Fprint(w, string(bytes))
  1607. }