router.go 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648
  1. package costmodel
  2. import (
  3. "context"
  4. "encoding/base64"
  5. "fmt"
  6. "io/ioutil"
  7. "net/http"
  8. "reflect"
  9. "strconv"
  10. "strings"
  11. "sync"
  12. "time"
  13. "github.com/kubecost/cost-model/pkg/config"
  14. "github.com/kubecost/cost-model/pkg/services"
  15. "github.com/kubecost/cost-model/pkg/util/httputil"
  16. "github.com/kubecost/cost-model/pkg/util/timeutil"
  17. "github.com/kubecost/cost-model/pkg/util/watcher"
  18. "github.com/microcosm-cc/bluemonday"
  19. v1 "k8s.io/api/core/v1"
  20. "k8s.io/klog"
  21. "github.com/julienschmidt/httprouter"
  22. sentry "github.com/getsentry/sentry-go"
  23. "github.com/kubecost/cost-model/pkg/cloud"
  24. "github.com/kubecost/cost-model/pkg/clustercache"
  25. "github.com/kubecost/cost-model/pkg/costmodel/clusters"
  26. "github.com/kubecost/cost-model/pkg/env"
  27. "github.com/kubecost/cost-model/pkg/errors"
  28. "github.com/kubecost/cost-model/pkg/kubecost"
  29. "github.com/kubecost/cost-model/pkg/log"
  30. "github.com/kubecost/cost-model/pkg/prom"
  31. "github.com/kubecost/cost-model/pkg/thanos"
  32. "github.com/kubecost/cost-model/pkg/util/json"
  33. prometheus "github.com/prometheus/client_golang/api"
  34. prometheusAPI "github.com/prometheus/client_golang/api/prometheus/v1"
  35. appsv1 "k8s.io/api/apps/v1"
  36. metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  37. "github.com/patrickmn/go-cache"
  38. "k8s.io/client-go/kubernetes"
  39. "k8s.io/client-go/rest"
  40. "k8s.io/client-go/tools/clientcmd"
  41. )
  42. var sanitizePolicy = bluemonday.UGCPolicy()
  43. const (
  44. RFC3339Milli = "2006-01-02T15:04:05.000Z"
  45. maxCacheMinutes1d = 11
  46. maxCacheMinutes2d = 17
  47. maxCacheMinutes7d = 37
  48. maxCacheMinutes30d = 137
  49. CustomPricingSetting = "CustomPricing"
  50. DiscountSetting = "Discount"
  51. epRules = apiPrefix + "/rules"
  52. LogSeparator = "+-------------------------------------------------------------------------------------"
  53. )
  54. var (
  55. // gitCommit is set by the build system
  56. gitCommit string
  57. )
  58. // Accesses defines a singleton application instance, providing access to
  59. // Prometheus, Kubernetes, the cloud provider, and caches.
  60. type Accesses struct {
  61. Router *httprouter.Router
  62. PrometheusClient prometheus.Client
  63. ThanosClient prometheus.Client
  64. KubeClientSet kubernetes.Interface
  65. ClusterCache clustercache.ClusterCache
  66. ClusterMap clusters.ClusterMap
  67. CloudProvider cloud.Provider
  68. ConfigFileManager *config.ConfigFileManager
  69. ClusterInfoProvider clusters.ClusterInfoProvider
  70. Model *CostModel
  71. MetricsEmitter *CostModelMetricsEmitter
  72. OutOfClusterCache *cache.Cache
  73. AggregateCache *cache.Cache
  74. CostDataCache *cache.Cache
  75. ClusterCostsCache *cache.Cache
  76. CacheExpiration map[time.Duration]time.Duration
  77. AggAPI Aggregator
  78. // SettingsCache stores current state of app settings
  79. SettingsCache *cache.Cache
  80. // settingsSubscribers tracks channels through which changes to different
  81. // settings will be published in a pub/sub model
  82. settingsSubscribers map[string][]chan string
  83. settingsMutex sync.Mutex
  84. // registered http service instances
  85. httpServices services.HTTPServices
  86. }
  87. // GetPrometheusClient decides whether the default Prometheus client or the Thanos client
  88. // should be used.
  89. func (a *Accesses) GetPrometheusClient(remote bool) prometheus.Client {
  90. // Use Thanos Client if it exists (enabled) and remote flag set
  91. var pc prometheus.Client
  92. if remote && a.ThanosClient != nil {
  93. pc = a.ThanosClient
  94. } else {
  95. pc = a.PrometheusClient
  96. }
  97. return pc
  98. }
  99. // GetCacheExpiration looks up and returns custom cache expiration for the given duration.
  100. // If one does not exists, it returns the default cache expiration, which is defined by
  101. // the particular cache.
  102. func (a *Accesses) GetCacheExpiration(dur time.Duration) time.Duration {
  103. if expiration, ok := a.CacheExpiration[dur]; ok {
  104. return expiration
  105. }
  106. return cache.DefaultExpiration
  107. }
  108. // GetCacheRefresh determines how long to wait before refreshing the cache for the given duration,
  109. // which is done 1 minute before we expect the cache to expire, or 1 minute if expiration is
  110. // not found or is less than 2 minutes.
  111. func (a *Accesses) GetCacheRefresh(dur time.Duration) time.Duration {
  112. expiry := a.GetCacheExpiration(dur).Minutes()
  113. if expiry <= 2.0 {
  114. return time.Minute
  115. }
  116. mins := time.Duration(expiry/2.0) * time.Minute
  117. return mins
  118. }
  119. func (a *Accesses) ClusterCostsFromCacheHandler(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  120. w.Header().Set("Content-Type", "application/json")
  121. duration := 24 * time.Hour
  122. offset := time.Minute
  123. durationHrs := "24h"
  124. fmtOffset := "1m"
  125. pClient := a.GetPrometheusClient(true)
  126. key := fmt.Sprintf("%s:%s", durationHrs, fmtOffset)
  127. if data, valid := a.ClusterCostsCache.Get(key); valid {
  128. clusterCosts := data.(map[string]*ClusterCosts)
  129. w.Write(WrapDataWithMessage(clusterCosts, nil, "clusterCosts cache hit"))
  130. } else {
  131. data, err := a.ComputeClusterCosts(pClient, a.CloudProvider, duration, offset, true)
  132. w.Write(WrapDataWithMessage(data, err, fmt.Sprintf("clusterCosts cache miss: %s", key)))
  133. }
  134. }
  135. type Response struct {
  136. Code int `json:"code"`
  137. Status string `json:"status"`
  138. Data interface{} `json:"data"`
  139. Message string `json:"message,omitempty"`
  140. Warning string `json:"warning,omitempty"`
  141. }
  142. // FilterFunc is a filter that returns true iff the given CostData should be filtered out, and the environment that was used as the filter criteria, if it was an aggregate
  143. type FilterFunc func(*CostData) (bool, string)
  144. // FilterCostData allows through only CostData that matches all the given filter functions
  145. func FilterCostData(data map[string]*CostData, retains []FilterFunc, filters []FilterFunc) (map[string]*CostData, int, map[string]int) {
  146. result := make(map[string]*CostData)
  147. filteredEnvironments := make(map[string]int)
  148. filteredContainers := 0
  149. DataLoop:
  150. for key, datum := range data {
  151. for _, rf := range retains {
  152. if ok, _ := rf(datum); ok {
  153. result[key] = datum
  154. // if any retain function passes, the data is retained and move on
  155. continue DataLoop
  156. }
  157. }
  158. for _, ff := range filters {
  159. if ok, environment := ff(datum); !ok {
  160. if environment != "" {
  161. filteredEnvironments[environment]++
  162. }
  163. filteredContainers++
  164. // if any filter function check fails, move on to the next datum
  165. continue DataLoop
  166. }
  167. }
  168. result[key] = datum
  169. }
  170. return result, filteredContainers, filteredEnvironments
  171. }
  172. func filterFields(fields string, data map[string]*CostData) map[string]CostData {
  173. fs := strings.Split(fields, ",")
  174. fmap := make(map[string]bool)
  175. for _, f := range fs {
  176. fieldNameLower := strings.ToLower(f) // convert to go struct name by uppercasing first letter
  177. klog.V(1).Infof("to delete: %s", fieldNameLower)
  178. fmap[fieldNameLower] = true
  179. }
  180. filteredData := make(map[string]CostData)
  181. for cname, costdata := range data {
  182. s := reflect.TypeOf(*costdata)
  183. val := reflect.ValueOf(*costdata)
  184. costdata2 := CostData{}
  185. cd2 := reflect.New(reflect.Indirect(reflect.ValueOf(costdata2)).Type()).Elem()
  186. n := s.NumField()
  187. for i := 0; i < n; i++ {
  188. field := s.Field(i)
  189. value := val.Field(i)
  190. value2 := cd2.Field(i)
  191. if _, ok := fmap[strings.ToLower(field.Name)]; !ok {
  192. value2.Set(reflect.Value(value))
  193. }
  194. }
  195. filteredData[cname] = cd2.Interface().(CostData)
  196. }
  197. return filteredData
  198. }
  199. func normalizeTimeParam(param string) (string, error) {
  200. if param == "" {
  201. return "", fmt.Errorf("invalid time param")
  202. }
  203. // convert days to hours
  204. if param[len(param)-1:] == "d" {
  205. count := param[:len(param)-1]
  206. val, err := strconv.ParseInt(count, 10, 64)
  207. if err != nil {
  208. return "", err
  209. }
  210. val = val * 24
  211. param = fmt.Sprintf("%dh", val)
  212. }
  213. return param, nil
  214. }
  215. // ParsePercentString takes a string of expected format "N%" and returns a floating point 0.0N.
  216. // If the "%" symbol is missing, it just returns 0.0N. Empty string is interpreted as "0%" and
  217. // return 0.0.
  218. func ParsePercentString(percentStr string) (float64, error) {
  219. if len(percentStr) == 0 {
  220. return 0.0, nil
  221. }
  222. if percentStr[len(percentStr)-1:] == "%" {
  223. percentStr = percentStr[:len(percentStr)-1]
  224. }
  225. discount, err := strconv.ParseFloat(percentStr, 64)
  226. if err != nil {
  227. return 0.0, err
  228. }
  229. discount *= 0.01
  230. return discount, nil
  231. }
  232. func WrapData(data interface{}, err error) []byte {
  233. var resp []byte
  234. if err != nil {
  235. klog.V(1).Infof("Error returned to client: %s", err.Error())
  236. resp, _ = json.Marshal(&Response{
  237. Code: http.StatusInternalServerError,
  238. Status: "error",
  239. Message: err.Error(),
  240. Data: data,
  241. })
  242. } else {
  243. resp, _ = json.Marshal(&Response{
  244. Code: http.StatusOK,
  245. Status: "success",
  246. Data: data,
  247. })
  248. }
  249. return resp
  250. }
  251. func WrapDataWithMessage(data interface{}, err error, message string) []byte {
  252. var resp []byte
  253. if err != nil {
  254. klog.V(1).Infof("Error returned to client: %s", err.Error())
  255. resp, _ = json.Marshal(&Response{
  256. Code: http.StatusInternalServerError,
  257. Status: "error",
  258. Message: err.Error(),
  259. Data: data,
  260. })
  261. } else {
  262. resp, _ = json.Marshal(&Response{
  263. Code: http.StatusOK,
  264. Status: "success",
  265. Data: data,
  266. Message: message,
  267. })
  268. }
  269. return resp
  270. }
  271. func WrapDataWithWarning(data interface{}, err error, warning string) []byte {
  272. var resp []byte
  273. if err != nil {
  274. klog.V(1).Infof("Error returned to client: %s", err.Error())
  275. resp, _ = json.Marshal(&Response{
  276. Code: http.StatusInternalServerError,
  277. Status: "error",
  278. Message: err.Error(),
  279. Warning: warning,
  280. Data: data,
  281. })
  282. } else {
  283. resp, _ = json.Marshal(&Response{
  284. Code: http.StatusOK,
  285. Status: "success",
  286. Data: data,
  287. Warning: warning,
  288. })
  289. }
  290. return resp
  291. }
  292. func WrapDataWithMessageAndWarning(data interface{}, err error, message, warning string) []byte {
  293. var resp []byte
  294. if err != nil {
  295. klog.V(1).Infof("Error returned to client: %s", err.Error())
  296. resp, _ = json.Marshal(&Response{
  297. Code: http.StatusInternalServerError,
  298. Status: "error",
  299. Message: err.Error(),
  300. Warning: warning,
  301. Data: data,
  302. })
  303. } else {
  304. resp, _ = json.Marshal(&Response{
  305. Code: http.StatusOK,
  306. Status: "success",
  307. Data: data,
  308. Message: message,
  309. Warning: warning,
  310. })
  311. }
  312. return resp
  313. }
  314. // wrapAsObjectItems wraps a slice of items into an object containing a single items list
  315. // allows our k8s proxy methods to emulate a List() request to k8s API
  316. func wrapAsObjectItems(items interface{}) map[string]interface{} {
  317. return map[string]interface{}{
  318. "items": items,
  319. }
  320. }
  321. // RefreshPricingData needs to be called when a new node joins the fleet, since we cache the relevant subsets of pricing data to avoid storing the whole thing.
  322. func (a *Accesses) RefreshPricingData(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  323. w.Header().Set("Content-Type", "application/json")
  324. w.Header().Set("Access-Control-Allow-Origin", "*")
  325. err := a.CloudProvider.DownloadPricingData()
  326. if err != nil {
  327. klog.V(1).Infof("Error refreshing pricing data: %s", err.Error())
  328. }
  329. w.Write(WrapData(nil, err))
  330. }
  331. func (a *Accesses) CostDataModel(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  332. w.Header().Set("Content-Type", "application/json")
  333. w.Header().Set("Access-Control-Allow-Origin", "*")
  334. window := r.URL.Query().Get("timeWindow")
  335. offset := r.URL.Query().Get("offset")
  336. fields := r.URL.Query().Get("filterFields")
  337. namespace := r.URL.Query().Get("namespace")
  338. if offset != "" {
  339. offset = "offset " + offset
  340. }
  341. data, err := a.Model.ComputeCostData(a.PrometheusClient, a.CloudProvider, window, offset, namespace)
  342. if fields != "" {
  343. filteredData := filterFields(fields, data)
  344. w.Write(WrapData(filteredData, err))
  345. } else {
  346. w.Write(WrapData(data, err))
  347. }
  348. }
  349. func (a *Accesses) ClusterCosts(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  350. w.Header().Set("Content-Type", "application/json")
  351. w.Header().Set("Access-Control-Allow-Origin", "*")
  352. window := r.URL.Query().Get("window")
  353. offset := r.URL.Query().Get("offset")
  354. if window == "" {
  355. w.Write(WrapData(nil, fmt.Errorf("missing window arguement")))
  356. return
  357. }
  358. windowDur, err := timeutil.ParseDuration(window)
  359. if err != nil {
  360. w.Write(WrapData(nil, fmt.Errorf("error parsing window (%s): %s", window, err)))
  361. return
  362. }
  363. // offset is not a required parameter
  364. var offsetDur time.Duration
  365. if offset != "" {
  366. offsetDur, err = timeutil.ParseDuration(offset)
  367. if err != nil {
  368. w.Write(WrapData(nil, fmt.Errorf("error parsing offset (%s): %s", offset, err)))
  369. return
  370. }
  371. }
  372. useThanos, _ := strconv.ParseBool(r.URL.Query().Get("multi"))
  373. if useThanos && !thanos.IsEnabled() {
  374. w.Write(WrapData(nil, fmt.Errorf("Multi=true while Thanos is not enabled.")))
  375. return
  376. }
  377. var client prometheus.Client
  378. if useThanos {
  379. client = a.ThanosClient
  380. offsetDur = thanos.OffsetDuration()
  381. } else {
  382. client = a.PrometheusClient
  383. }
  384. data, err := a.ComputeClusterCosts(client, a.CloudProvider, windowDur, offsetDur, true)
  385. w.Write(WrapData(data, err))
  386. }
  387. func (a *Accesses) ClusterCostsOverTime(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  388. w.Header().Set("Content-Type", "application/json")
  389. w.Header().Set("Access-Control-Allow-Origin", "*")
  390. start := r.URL.Query().Get("start")
  391. end := r.URL.Query().Get("end")
  392. window := r.URL.Query().Get("window")
  393. offset := r.URL.Query().Get("offset")
  394. if window == "" {
  395. w.Write(WrapData(nil, fmt.Errorf("missing window arguement")))
  396. return
  397. }
  398. windowDur, err := timeutil.ParseDuration(window)
  399. if err != nil {
  400. w.Write(WrapData(nil, fmt.Errorf("error parsing window (%s): %s", window, err)))
  401. return
  402. }
  403. // offset is not a required parameter
  404. var offsetDur time.Duration
  405. if offset != "" {
  406. offsetDur, err = timeutil.ParseDuration(offset)
  407. if err != nil {
  408. w.Write(WrapData(nil, fmt.Errorf("error parsing offset (%s): %s", offset, err)))
  409. return
  410. }
  411. }
  412. data, err := ClusterCostsOverTime(a.PrometheusClient, a.CloudProvider, start, end, windowDur, offsetDur)
  413. w.Write(WrapData(data, err))
  414. }
  415. func (a *Accesses) CostDataModelRange(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  416. w.Header().Set("Content-Type", "application/json")
  417. w.Header().Set("Access-Control-Allow-Origin", "*")
  418. startStr := r.URL.Query().Get("start")
  419. endStr := r.URL.Query().Get("end")
  420. windowStr := r.URL.Query().Get("window")
  421. fields := r.URL.Query().Get("filterFields")
  422. namespace := r.URL.Query().Get("namespace")
  423. cluster := r.URL.Query().Get("cluster")
  424. remote := r.URL.Query().Get("remote")
  425. remoteEnabled := env.IsRemoteEnabled() && remote != "false"
  426. layout := "2006-01-02T15:04:05.000Z"
  427. start, err := time.Parse(layout, startStr)
  428. if err != nil {
  429. w.Write(WrapDataWithMessage(nil, fmt.Errorf("invalid start date: %s", startStr), fmt.Sprintf("invalid start date: %s", startStr)))
  430. return
  431. }
  432. end, err := time.Parse(layout, endStr)
  433. if err != nil {
  434. w.Write(WrapDataWithMessage(nil, fmt.Errorf("invalid end date: %s", endStr), fmt.Sprintf("invalid end date: %s", endStr)))
  435. return
  436. }
  437. window := kubecost.NewWindow(&start, &end)
  438. if window.IsOpen() || window.IsEmpty() || window.IsNegative() {
  439. w.Write(WrapDataWithMessage(nil, fmt.Errorf("invalid date range: %s", window), fmt.Sprintf("invalid date range: %s", window)))
  440. return
  441. }
  442. resolution := time.Hour
  443. if resDur, err := time.ParseDuration(windowStr); err == nil {
  444. resolution = resDur
  445. }
  446. // Use Thanos Client if it exists (enabled) and remote flag set
  447. var pClient prometheus.Client
  448. if remote != "false" && a.ThanosClient != nil {
  449. pClient = a.ThanosClient
  450. } else {
  451. pClient = a.PrometheusClient
  452. }
  453. data, err := a.Model.ComputeCostDataRange(pClient, a.CloudProvider, window, resolution, namespace, cluster, remoteEnabled)
  454. if err != nil {
  455. w.Write(WrapData(nil, err))
  456. }
  457. if fields != "" {
  458. filteredData := filterFields(fields, data)
  459. w.Write(WrapData(filteredData, err))
  460. } else {
  461. w.Write(WrapData(data, err))
  462. }
  463. }
  464. func parseAggregations(customAggregation, aggregator, filterType string) (string, []string, string) {
  465. var key string
  466. var filter string
  467. var val []string
  468. if customAggregation != "" {
  469. key = customAggregation
  470. filter = filterType
  471. val = strings.Split(customAggregation, ",")
  472. } else {
  473. aggregations := strings.Split(aggregator, ",")
  474. for i, agg := range aggregations {
  475. aggregations[i] = "kubernetes_" + agg
  476. }
  477. key = strings.Join(aggregations, ",")
  478. filter = "kubernetes_" + filterType
  479. val = aggregations
  480. }
  481. return key, val, filter
  482. }
  483. func (a *Accesses) GetAllNodePricing(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  484. w.Header().Set("Content-Type", "application/json")
  485. w.Header().Set("Access-Control-Allow-Origin", "*")
  486. data, err := a.CloudProvider.AllNodePricing()
  487. w.Write(WrapData(data, err))
  488. }
  489. func (a *Accesses) GetConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  490. w.Header().Set("Content-Type", "application/json")
  491. w.Header().Set("Access-Control-Allow-Origin", "*")
  492. data, err := a.CloudProvider.GetConfig()
  493. w.Write(WrapData(data, err))
  494. }
  495. func (a *Accesses) UpdateSpotInfoConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  496. w.Header().Set("Content-Type", "application/json")
  497. w.Header().Set("Access-Control-Allow-Origin", "*")
  498. data, err := a.CloudProvider.UpdateConfig(r.Body, cloud.SpotInfoUpdateType)
  499. if err != nil {
  500. w.Write(WrapData(data, err))
  501. return
  502. }
  503. w.Write(WrapData(data, err))
  504. err = a.CloudProvider.DownloadPricingData()
  505. if err != nil {
  506. klog.V(1).Infof("Error redownloading data on config update: %s", err.Error())
  507. }
  508. return
  509. }
  510. func (a *Accesses) UpdateAthenaInfoConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  511. w.Header().Set("Content-Type", "application/json")
  512. w.Header().Set("Access-Control-Allow-Origin", "*")
  513. data, err := a.CloudProvider.UpdateConfig(r.Body, cloud.AthenaInfoUpdateType)
  514. if err != nil {
  515. w.Write(WrapData(data, err))
  516. return
  517. }
  518. w.Write(WrapData(data, err))
  519. return
  520. }
  521. func (a *Accesses) UpdateBigQueryInfoConfigs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  522. w.Header().Set("Content-Type", "application/json")
  523. w.Header().Set("Access-Control-Allow-Origin", "*")
  524. data, err := a.CloudProvider.UpdateConfig(r.Body, cloud.BigqueryUpdateType)
  525. if err != nil {
  526. w.Write(WrapData(data, err))
  527. return
  528. }
  529. w.Write(WrapData(data, err))
  530. return
  531. }
  532. func (a *Accesses) UpdateConfigByKey(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  533. w.Header().Set("Content-Type", "application/json")
  534. w.Header().Set("Access-Control-Allow-Origin", "*")
  535. data, err := a.CloudProvider.UpdateConfig(r.Body, "")
  536. if err != nil {
  537. w.Write(WrapData(data, err))
  538. return
  539. }
  540. w.Write(WrapData(data, err))
  541. return
  542. }
  543. func (a *Accesses) ManagementPlatform(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  544. w.Header().Set("Content-Type", "application/json")
  545. w.Header().Set("Access-Control-Allow-Origin", "*")
  546. data, err := a.CloudProvider.GetManagementPlatform()
  547. if err != nil {
  548. w.Write(WrapData(data, err))
  549. return
  550. }
  551. w.Write(WrapData(data, err))
  552. return
  553. }
  554. func (a *Accesses) ClusterInfo(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  555. w.Header().Set("Content-Type", "application/json")
  556. w.Header().Set("Access-Control-Allow-Origin", "*")
  557. data := a.ClusterInfoProvider.GetClusterInfo()
  558. w.Write(WrapData(data, nil))
  559. }
  560. func (a *Accesses) GetClusterInfoMap(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  561. w.Header().Set("Content-Type", "application/json")
  562. w.Header().Set("Access-Control-Allow-Origin", "*")
  563. data := a.ClusterMap.AsMap()
  564. w.Write(WrapData(data, nil))
  565. }
  566. func (a *Accesses) GetServiceAccountStatus(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
  567. w.Header().Set("Content-Type", "application/json")
  568. w.Header().Set("Access-Control-Allow-Origin", "*")
  569. w.Write(WrapData(a.CloudProvider.ServiceAccountStatus(), nil))
  570. }
  571. func (a *Accesses) GetPricingSourceStatus(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
  572. w.Header().Set("Content-Type", "application/json")
  573. w.Header().Set("Access-Control-Allow-Origin", "*")
  574. w.Write(WrapData(a.CloudProvider.PricingSourceStatus(), nil))
  575. }
  576. func (a *Accesses) GetPricingSourceCounts(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
  577. w.Header().Set("Content-Type", "application/json")
  578. w.Header().Set("Access-Control-Allow-Origin", "*")
  579. w.Write(WrapData(a.Model.GetPricingSourceCounts()))
  580. }
  581. func (a *Accesses) GetPrometheusMetadata(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
  582. w.Header().Set("Content-Type", "application/json")
  583. w.Header().Set("Access-Control-Allow-Origin", "*")
  584. w.Write(WrapData(prom.Validate(a.PrometheusClient)))
  585. }
  586. func (a *Accesses) PrometheusQuery(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  587. w.Header().Set("Content-Type", "application/json")
  588. w.Header().Set("Access-Control-Allow-Origin", "*")
  589. qp := httputil.NewQueryParams(r.URL.Query())
  590. query := qp.Get("query", "")
  591. if query == "" {
  592. w.Write(WrapData(nil, fmt.Errorf("Query Parameter 'query' is unset'")))
  593. return
  594. }
  595. ctx := prom.NewNamedContext(a.PrometheusClient, prom.FrontendContextName)
  596. body, err := ctx.RawQuery(query)
  597. if err != nil {
  598. w.Write(WrapData(nil, fmt.Errorf("Error running query %s. Error: %s", query, err)))
  599. return
  600. }
  601. w.Write(body)
  602. }
  603. func (a *Accesses) PrometheusQueryRange(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  604. w.Header().Set("Content-Type", "application/json")
  605. w.Header().Set("Access-Control-Allow-Origin", "*")
  606. qp := httputil.NewQueryParams(r.URL.Query())
  607. query := qp.Get("query", "")
  608. if query == "" {
  609. fmt.Fprintf(w, "Error parsing query from request parameters.")
  610. return
  611. }
  612. start, end, duration, err := toStartEndStep(qp)
  613. if err != nil {
  614. fmt.Fprintf(w, err.Error())
  615. return
  616. }
  617. ctx := prom.NewNamedContext(a.PrometheusClient, prom.FrontendContextName)
  618. body, err := ctx.RawQueryRange(query, start, end, duration)
  619. if err != nil {
  620. fmt.Fprintf(w, "Error running query %s. Error: %s", query, err)
  621. return
  622. }
  623. w.Write(body)
  624. }
  625. func (a *Accesses) ThanosQuery(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  626. w.Header().Set("Content-Type", "application/json")
  627. w.Header().Set("Access-Control-Allow-Origin", "*")
  628. if !thanos.IsEnabled() {
  629. w.Write(WrapData(nil, fmt.Errorf("ThanosDisabled")))
  630. return
  631. }
  632. qp := httputil.NewQueryParams(r.URL.Query())
  633. query := qp.Get("query", "")
  634. if query == "" {
  635. w.Write(WrapData(nil, fmt.Errorf("Query Parameter 'query' is unset'")))
  636. return
  637. }
  638. ctx := prom.NewNamedContext(a.ThanosClient, prom.FrontendContextName)
  639. body, err := ctx.RawQuery(query)
  640. if err != nil {
  641. w.Write(WrapData(nil, fmt.Errorf("Error running query %s. Error: %s", query, err)))
  642. return
  643. }
  644. w.Write(body)
  645. }
  646. func (a *Accesses) ThanosQueryRange(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  647. w.Header().Set("Content-Type", "application/json")
  648. w.Header().Set("Access-Control-Allow-Origin", "*")
  649. if !thanos.IsEnabled() {
  650. w.Write(WrapData(nil, fmt.Errorf("ThanosDisabled")))
  651. return
  652. }
  653. qp := httputil.NewQueryParams(r.URL.Query())
  654. query := qp.Get("query", "")
  655. if query == "" {
  656. fmt.Fprintf(w, "Error parsing query from request parameters.")
  657. return
  658. }
  659. start, end, duration, err := toStartEndStep(qp)
  660. if err != nil {
  661. fmt.Fprintf(w, err.Error())
  662. return
  663. }
  664. ctx := prom.NewNamedContext(a.ThanosClient, prom.FrontendContextName)
  665. body, err := ctx.RawQueryRange(query, start, end, duration)
  666. if err != nil {
  667. fmt.Fprintf(w, "Error running query %s. Error: %s", query, err)
  668. return
  669. }
  670. w.Write(body)
  671. }
  672. // helper for query range proxy requests
  673. func toStartEndStep(qp httputil.QueryParams) (start, end time.Time, step time.Duration, err error) {
  674. var e error
  675. ss := qp.Get("start", "")
  676. es := qp.Get("end", "")
  677. ds := qp.Get("duration", "")
  678. layout := "2006-01-02T15:04:05.000Z"
  679. start, e = time.Parse(layout, ss)
  680. if e != nil {
  681. err = fmt.Errorf("Error parsing time %s. Error: %s", ss, err)
  682. return
  683. }
  684. end, e = time.Parse(layout, es)
  685. if e != nil {
  686. err = fmt.Errorf("Error parsing time %s. Error: %s", es, err)
  687. return
  688. }
  689. step, e = time.ParseDuration(ds)
  690. if e != nil {
  691. err = fmt.Errorf("Error parsing duration %s. Error: %s", ds, err)
  692. return
  693. }
  694. err = nil
  695. return
  696. }
  697. func (a *Accesses) GetPrometheusQueueState(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
  698. w.Header().Set("Content-Type", "application/json")
  699. w.Header().Set("Access-Control-Allow-Origin", "*")
  700. promQueueState, err := prom.GetPrometheusQueueState(a.PrometheusClient)
  701. if err != nil {
  702. w.Write(WrapData(nil, err))
  703. return
  704. }
  705. result := map[string]*prom.PrometheusQueueState{
  706. "prometheus": promQueueState,
  707. }
  708. if thanos.IsEnabled() {
  709. thanosQueueState, err := prom.GetPrometheusQueueState(a.ThanosClient)
  710. if err != nil {
  711. log.Warningf("Error getting Thanos queue state: %s", err)
  712. } else {
  713. result["thanos"] = thanosQueueState
  714. }
  715. }
  716. w.Write(WrapData(result, nil))
  717. }
  718. // GetPrometheusMetrics retrieves availability of Prometheus and Thanos metrics
  719. func (a *Accesses) GetPrometheusMetrics(w http.ResponseWriter, _ *http.Request, _ httprouter.Params) {
  720. w.Header().Set("Content-Type", "application/json")
  721. w.Header().Set("Access-Control-Allow-Origin", "*")
  722. promMetrics, err := prom.GetPrometheusMetrics(a.PrometheusClient, "")
  723. if err != nil {
  724. w.Write(WrapData(nil, err))
  725. return
  726. }
  727. result := map[string][]*prom.PrometheusDiagnostic{
  728. "prometheus": promMetrics,
  729. }
  730. if thanos.IsEnabled() {
  731. thanosMetrics, err := prom.GetPrometheusMetrics(a.ThanosClient, thanos.QueryOffset())
  732. if err != nil {
  733. log.Warningf("Error getting Thanos queue state: %s", err)
  734. } else {
  735. result["thanos"] = thanosMetrics
  736. }
  737. }
  738. w.Write(WrapData(result, nil))
  739. }
  740. func (a *Accesses) GetAllPersistentVolumes(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  741. w.Header().Set("Content-Type", "application/json")
  742. w.Header().Set("Access-Control-Allow-Origin", "*")
  743. pvList := a.ClusterCache.GetAllPersistentVolumes()
  744. body, err := json.Marshal(wrapAsObjectItems(pvList))
  745. if err != nil {
  746. fmt.Fprintf(w, "Error decoding persistent volumes: "+err.Error())
  747. } else {
  748. w.Write(body)
  749. }
  750. }
  751. func (a *Accesses) GetAllDeployments(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  752. w.Header().Set("Content-Type", "application/json")
  753. w.Header().Set("Access-Control-Allow-Origin", "*")
  754. qp := httputil.NewQueryParams(r.URL.Query())
  755. namespace := qp.Get("namespace", "")
  756. deploymentsList := a.ClusterCache.GetAllDeployments()
  757. // filter for provided namespace
  758. var deployments []*appsv1.Deployment
  759. if namespace == "" {
  760. deployments = deploymentsList
  761. } else {
  762. deployments = []*appsv1.Deployment{}
  763. for _, d := range deploymentsList {
  764. if d.Namespace == namespace {
  765. deployments = append(deployments, d)
  766. }
  767. }
  768. }
  769. body, err := json.Marshal(wrapAsObjectItems(deployments))
  770. if err != nil {
  771. fmt.Fprintf(w, "Error decoding deployment: "+err.Error())
  772. } else {
  773. w.Write(body)
  774. }
  775. }
  776. func (a *Accesses) GetAllStorageClasses(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  777. w.Header().Set("Content-Type", "application/json")
  778. w.Header().Set("Access-Control-Allow-Origin", "*")
  779. scList := a.ClusterCache.GetAllStorageClasses()
  780. body, err := json.Marshal(wrapAsObjectItems(scList))
  781. if err != nil {
  782. fmt.Fprintf(w, "Error decoding storageclasses: "+err.Error())
  783. } else {
  784. w.Write(body)
  785. }
  786. }
  787. func (a *Accesses) GetAllStatefulSets(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  788. w.Header().Set("Content-Type", "application/json")
  789. w.Header().Set("Access-Control-Allow-Origin", "*")
  790. qp := httputil.NewQueryParams(r.URL.Query())
  791. namespace := qp.Get("namespace", "")
  792. statefulSetsList := a.ClusterCache.GetAllStatefulSets()
  793. // filter for provided namespace
  794. var statefulSets []*appsv1.StatefulSet
  795. if namespace == "" {
  796. statefulSets = statefulSetsList
  797. } else {
  798. statefulSets = []*appsv1.StatefulSet{}
  799. for _, ss := range statefulSetsList {
  800. if ss.Namespace == namespace {
  801. statefulSets = append(statefulSets, ss)
  802. }
  803. }
  804. }
  805. body, err := json.Marshal(wrapAsObjectItems(statefulSets))
  806. if err != nil {
  807. fmt.Fprintf(w, "Error decoding deployment: "+err.Error())
  808. } else {
  809. w.Write(body)
  810. }
  811. }
  812. func (a *Accesses) GetAllNodes(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  813. w.Header().Set("Content-Type", "application/json")
  814. w.Header().Set("Access-Control-Allow-Origin", "*")
  815. nodeList := a.ClusterCache.GetAllNodes()
  816. body, err := json.Marshal(wrapAsObjectItems(nodeList))
  817. if err != nil {
  818. fmt.Fprintf(w, "Error decoding nodes: "+err.Error())
  819. } else {
  820. w.Write(body)
  821. }
  822. }
  823. func (a *Accesses) GetAllPods(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  824. w.Header().Set("Content-Type", "application/json")
  825. w.Header().Set("Access-Control-Allow-Origin", "*")
  826. podlist := a.ClusterCache.GetAllPods()
  827. body, err := json.Marshal(wrapAsObjectItems(podlist))
  828. if err != nil {
  829. fmt.Fprintf(w, "Error decoding pods: "+err.Error())
  830. } else {
  831. w.Write(body)
  832. }
  833. }
  834. func (a *Accesses) GetAllNamespaces(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  835. w.Header().Set("Content-Type", "application/json")
  836. w.Header().Set("Access-Control-Allow-Origin", "*")
  837. namespaces := a.ClusterCache.GetAllNamespaces()
  838. body, err := json.Marshal(wrapAsObjectItems(namespaces))
  839. if err != nil {
  840. fmt.Fprintf(w, "Error decoding deployment: "+err.Error())
  841. } else {
  842. w.Write(body)
  843. }
  844. }
  845. func (a *Accesses) GetAllDaemonSets(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  846. w.Header().Set("Content-Type", "application/json")
  847. w.Header().Set("Access-Control-Allow-Origin", "*")
  848. daemonSets := a.ClusterCache.GetAllDaemonSets()
  849. body, err := json.Marshal(wrapAsObjectItems(daemonSets))
  850. if err != nil {
  851. fmt.Fprintf(w, "Error decoding daemon set: "+err.Error())
  852. } else {
  853. w.Write(body)
  854. }
  855. }
  856. func (a *Accesses) GetPod(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  857. w.Header().Set("Content-Type", "application/json")
  858. w.Header().Set("Access-Control-Allow-Origin", "*")
  859. podName := ps.ByName("name")
  860. podNamespace := ps.ByName("namespace")
  861. // TODO: ClusterCache API could probably afford to have some better filtering
  862. allPods := a.ClusterCache.GetAllPods()
  863. for _, pod := range allPods {
  864. for _, container := range pod.Spec.Containers {
  865. container.Env = make([]v1.EnvVar, 0)
  866. }
  867. if pod.Namespace == podNamespace && pod.Name == podName {
  868. body, err := json.Marshal(pod)
  869. if err != nil {
  870. fmt.Fprintf(w, "Error decoding pod: "+err.Error())
  871. } else {
  872. w.Write(body)
  873. }
  874. return
  875. }
  876. }
  877. fmt.Fprintf(w, "Pod not found\n")
  878. }
  879. func (a *Accesses) PrometheusRecordingRules(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  880. w.Header().Set("Content-Type", "application/json")
  881. w.Header().Set("Access-Control-Allow-Origin", "*")
  882. u := a.PrometheusClient.URL(epRules, nil)
  883. req, err := http.NewRequest(http.MethodGet, u.String(), nil)
  884. if err != nil {
  885. fmt.Fprintf(w, "Error creating Prometheus rule request: "+err.Error())
  886. }
  887. _, body, _, err := a.PrometheusClient.Do(r.Context(), req)
  888. if err != nil {
  889. fmt.Fprintf(w, "Error making Prometheus rule request: "+err.Error())
  890. } else {
  891. w.Write(body)
  892. }
  893. }
  894. func (a *Accesses) PrometheusConfig(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  895. w.Header().Set("Content-Type", "application/json")
  896. w.Header().Set("Access-Control-Allow-Origin", "*")
  897. pConfig := map[string]string{
  898. "address": env.GetPrometheusServerEndpoint(),
  899. }
  900. body, err := json.Marshal(pConfig)
  901. if err != nil {
  902. fmt.Fprintf(w, "Error marshalling prometheus config")
  903. } else {
  904. w.Write(body)
  905. }
  906. }
  907. func (a *Accesses) PrometheusTargets(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  908. w.Header().Set("Content-Type", "application/json")
  909. w.Header().Set("Access-Control-Allow-Origin", "*")
  910. u := a.PrometheusClient.URL(epTargets, nil)
  911. req, err := http.NewRequest(http.MethodGet, u.String(), nil)
  912. if err != nil {
  913. fmt.Fprintf(w, "Error creating Prometheus rule request: "+err.Error())
  914. }
  915. _, body, _, err := a.PrometheusClient.Do(r.Context(), req)
  916. if err != nil {
  917. fmt.Fprintf(w, "Error making Prometheus rule request: "+err.Error())
  918. } else {
  919. w.Write(body)
  920. }
  921. }
  922. func (a *Accesses) GetOrphanedPods(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  923. w.Header().Set("Content-Type", "application/json")
  924. w.Header().Set("Access-Control-Allow-Origin", "*")
  925. podlist := a.ClusterCache.GetAllPods()
  926. var lonePods []*v1.Pod
  927. for _, pod := range podlist {
  928. if len(pod.OwnerReferences) == 0 {
  929. lonePods = append(lonePods, pod)
  930. }
  931. }
  932. body, err := json.Marshal(lonePods)
  933. if err != nil {
  934. fmt.Fprintf(w, "Error decoding pod: "+err.Error())
  935. } else {
  936. w.Write(body)
  937. }
  938. }
  939. func (a *Accesses) GetInstallNamespace(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  940. w.Header().Set("Content-Type", "application/json")
  941. w.Header().Set("Access-Control-Allow-Origin", "*")
  942. ns := env.GetKubecostNamespace()
  943. w.Write([]byte(ns))
  944. }
  945. // logsFor pulls the logs for a specific pod, namespace, and container
  946. func logsFor(c kubernetes.Interface, namespace string, pod string, container string, dur time.Duration, ctx context.Context) (string, error) {
  947. since := time.Now().UTC().Add(-dur)
  948. logOpts := v1.PodLogOptions{
  949. SinceTime: &metav1.Time{Time: since},
  950. }
  951. if container != "" {
  952. logOpts.Container = container
  953. }
  954. req := c.CoreV1().Pods(namespace).GetLogs(pod, &logOpts)
  955. reader, err := req.Stream(ctx)
  956. if err != nil {
  957. return "", err
  958. }
  959. podLogs, err := ioutil.ReadAll(reader)
  960. if err != nil {
  961. return "", err
  962. }
  963. return string(podLogs), nil
  964. }
  965. func (a *Accesses) GetPodLogs(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  966. w.Header().Set("Content-Type", "application/json")
  967. w.Header().Set("Access-Control-Allow-Origin", "*")
  968. qp := httputil.NewQueryParams(r.URL.Query())
  969. ns := qp.Get("namespace", env.GetKubecostNamespace())
  970. pod := qp.Get("pod", "")
  971. selector := qp.Get("selector", "")
  972. container := qp.Get("container", "")
  973. since := qp.Get("since", "24h")
  974. sinceDuration, err := time.ParseDuration(since)
  975. if err != nil {
  976. fmt.Fprintf(w, "Invalid Duration String: "+err.Error())
  977. return
  978. }
  979. var logResult string
  980. appendLog := func(ns string, pod string, container string, l string) {
  981. if l == "" {
  982. return
  983. }
  984. logResult += fmt.Sprintf("%s\n| %s:%s:%s\n%s\n%s\n\n", LogSeparator, ns, pod, container, LogSeparator, l)
  985. }
  986. if pod != "" {
  987. pd, err := a.KubeClientSet.CoreV1().Pods(ns).Get(r.Context(), pod, metav1.GetOptions{})
  988. if err != nil {
  989. fmt.Fprintf(w, "Error Finding Pod: "+err.Error())
  990. return
  991. }
  992. if container != "" {
  993. var foundContainer bool
  994. for _, cont := range pd.Spec.Containers {
  995. if strings.EqualFold(cont.Name, container) {
  996. foundContainer = true
  997. break
  998. }
  999. }
  1000. if !foundContainer {
  1001. fmt.Fprintf(w, "Could not find container: "+container)
  1002. return
  1003. }
  1004. }
  1005. logs, err := logsFor(a.KubeClientSet, ns, pod, container, sinceDuration, r.Context())
  1006. if err != nil {
  1007. fmt.Fprintf(w, "Error Getting Logs: "+err.Error())
  1008. return
  1009. }
  1010. appendLog(ns, pod, container, logs)
  1011. w.Write([]byte(logResult))
  1012. return
  1013. }
  1014. if selector != "" {
  1015. pods, err := a.KubeClientSet.CoreV1().Pods(ns).List(r.Context(), metav1.ListOptions{LabelSelector: selector})
  1016. if err != nil {
  1017. fmt.Fprintf(w, "Error Finding Pod: "+err.Error())
  1018. return
  1019. }
  1020. for _, pd := range pods.Items {
  1021. for _, cont := range pd.Spec.Containers {
  1022. logs, err := logsFor(a.KubeClientSet, ns, pd.Name, cont.Name, sinceDuration, r.Context())
  1023. if err != nil {
  1024. continue
  1025. }
  1026. appendLog(ns, pd.Name, cont.Name, logs)
  1027. }
  1028. }
  1029. }
  1030. w.Write([]byte(logResult))
  1031. }
  1032. func (a *Accesses) AddServiceKey(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  1033. w.Header().Set("Content-Type", "application/json")
  1034. w.Header().Set("Access-Control-Allow-Origin", "*")
  1035. r.ParseForm()
  1036. key := r.PostForm.Get("key")
  1037. k := []byte(key)
  1038. err := ioutil.WriteFile("/var/configs/key.json", k, 0644)
  1039. if err != nil {
  1040. fmt.Fprintf(w, "Error writing service key: "+err.Error())
  1041. }
  1042. w.WriteHeader(http.StatusOK)
  1043. }
  1044. func (a *Accesses) GetHelmValues(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
  1045. w.Header().Set("Content-Type", "application/json")
  1046. w.Header().Set("Access-Control-Allow-Origin", "*")
  1047. encodedValues := env.Get("HELM_VALUES", "")
  1048. if encodedValues == "" {
  1049. fmt.Fprintf(w, "Values reporting disabled")
  1050. return
  1051. }
  1052. result, err := base64.StdEncoding.DecodeString(encodedValues)
  1053. if err != nil {
  1054. fmt.Fprintf(w, "Failed to decode encoded values: %s", err)
  1055. return
  1056. }
  1057. w.Write(result)
  1058. }
  1059. func (a *Accesses) Status(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
  1060. w.Header().Set("Content-Type", "application/json")
  1061. w.Header().Set("Access-Control-Allow-Origin", "*")
  1062. promServer := env.GetPrometheusServerEndpoint()
  1063. api := prometheusAPI.NewAPI(a.PrometheusClient)
  1064. result, err := api.Config(r.Context())
  1065. if err != nil {
  1066. fmt.Fprintf(w, "Using Prometheus at "+promServer+". Error: "+err.Error())
  1067. } else {
  1068. fmt.Fprintf(w, "Using Prometheus at "+promServer+". PrometheusConfig: "+result.YAML)
  1069. }
  1070. }
  1071. // captures the panic event in sentry
  1072. func capturePanicEvent(err string, stack string) {
  1073. msg := fmt.Sprintf("Panic: %s\nStackTrace: %s\n", err, stack)
  1074. klog.V(1).Infoln(msg)
  1075. sentry.CurrentHub().CaptureEvent(&sentry.Event{
  1076. Level: sentry.LevelError,
  1077. Message: msg,
  1078. })
  1079. sentry.Flush(5 * time.Second)
  1080. }
  1081. // handle any panics reported by the errors package
  1082. func handlePanic(p errors.Panic) bool {
  1083. err := p.Error
  1084. if err != nil {
  1085. if err, ok := err.(error); ok {
  1086. capturePanicEvent(err.Error(), p.Stack)
  1087. }
  1088. if err, ok := err.(string); ok {
  1089. capturePanicEvent(err, p.Stack)
  1090. }
  1091. }
  1092. // Return true to recover iff the type is http, otherwise allow kubernetes
  1093. // to recover.
  1094. return p.Type == errors.PanicTypeHTTP
  1095. }
  1096. func Initialize(additionalConfigWatchers ...*watcher.ConfigMapWatcher) *Accesses {
  1097. klog.V(1).Infof("Starting cost-model (git commit \"%s\")", env.GetAppVersion())
  1098. configWatchers := watcher.NewConfigMapWatchers(additionalConfigWatchers...)
  1099. var err error
  1100. if errorReportingEnabled {
  1101. err = sentry.Init(sentry.ClientOptions{Release: env.GetAppVersion()})
  1102. if err != nil {
  1103. klog.Infof("Failed to initialize sentry for error reporting")
  1104. } else {
  1105. err = errors.SetPanicHandler(handlePanic)
  1106. if err != nil {
  1107. klog.Infof("Failed to set panic handler: %s", err)
  1108. }
  1109. }
  1110. }
  1111. address := env.GetPrometheusServerEndpoint()
  1112. if address == "" {
  1113. klog.Fatalf("No address for prometheus set in $%s. Aborting.", env.PrometheusServerEndpointEnvVar)
  1114. }
  1115. queryConcurrency := env.GetMaxQueryConcurrency()
  1116. klog.Infof("Prometheus/Thanos Client Max Concurrency set to %d", queryConcurrency)
  1117. timeout := 120 * time.Second
  1118. keepAlive := 120 * time.Second
  1119. tlsHandshakeTimeout := 10 * time.Second
  1120. scrapeInterval := time.Minute
  1121. var rateLimitRetryOpts *prom.RateLimitRetryOpts = nil
  1122. if env.IsPrometheusRetryOnRateLimitResponse() {
  1123. rateLimitRetryOpts = &prom.RateLimitRetryOpts{
  1124. MaxRetries: env.GetPrometheusRetryOnRateLimitMaxRetries(),
  1125. DefaultRetryWait: env.GetPrometheusRetryOnRateLimitDefaultWait(),
  1126. }
  1127. }
  1128. promCli, err := prom.NewPrometheusClient(address, &prom.PrometheusClientConfig{
  1129. Timeout: timeout,
  1130. KeepAlive: keepAlive,
  1131. TLSHandshakeTimeout: tlsHandshakeTimeout,
  1132. TLSInsecureSkipVerify: env.GetInsecureSkipVerify(),
  1133. RateLimitRetryOpts: rateLimitRetryOpts,
  1134. Auth: &prom.ClientAuth{
  1135. Username: env.GetDBBasicAuthUsername(),
  1136. Password: env.GetDBBasicAuthUserPassword(),
  1137. BearerToken: env.GetDBBearerToken(),
  1138. },
  1139. QueryConcurrency: queryConcurrency,
  1140. QueryLogFile: "",
  1141. })
  1142. if err != nil {
  1143. klog.Fatalf("Failed to create prometheus client, Error: %v", err)
  1144. }
  1145. m, err := prom.Validate(promCli)
  1146. if err != nil || !m.Running {
  1147. if err != nil {
  1148. klog.Errorf("Failed to query prometheus at %s. Error: %s . Troubleshooting help available at: %s", address, err.Error(), prom.PrometheusTroubleshootingURL)
  1149. } else if !m.Running {
  1150. klog.Errorf("Prometheus at %s is not running. Troubleshooting help available at: %s", address, prom.PrometheusTroubleshootingURL)
  1151. }
  1152. } else {
  1153. klog.V(1).Info("Success: retrieved the 'up' query against prometheus at: " + address)
  1154. }
  1155. api := prometheusAPI.NewAPI(promCli)
  1156. _, err = api.Config(context.Background())
  1157. if err != nil {
  1158. klog.Infof("No valid prometheus config file at %s. Error: %s . Troubleshooting help available at: %s. Ignore if using cortex/thanos here.", address, err.Error(), prom.PrometheusTroubleshootingURL)
  1159. } else {
  1160. klog.Infof("Retrieved a prometheus config file from: %s", address)
  1161. }
  1162. // Lookup scrape interval for kubecost job, update if found
  1163. si, err := prom.ScrapeIntervalFor(promCli, env.GetKubecostJobName())
  1164. if err == nil {
  1165. scrapeInterval = si
  1166. }
  1167. klog.Infof("Using scrape interval of %f", scrapeInterval.Seconds())
  1168. // Kubernetes API setup
  1169. var kc *rest.Config
  1170. if kubeconfig := env.GetKubeConfigPath(); kubeconfig != "" {
  1171. kc, err = clientcmd.BuildConfigFromFlags("", kubeconfig)
  1172. } else {
  1173. kc, err = rest.InClusterConfig()
  1174. }
  1175. if err != nil {
  1176. panic(err.Error())
  1177. }
  1178. kubeClientset, err := kubernetes.NewForConfig(kc)
  1179. if err != nil {
  1180. panic(err.Error())
  1181. }
  1182. // Create ConfigFileManager for synchronization of shared configuration
  1183. confManager := config.NewConfigFileManager(&config.ConfigFileManagerOpts{
  1184. BucketStoreConfig: env.GetKubecostConfigBucket(),
  1185. LocalConfigPath: "/",
  1186. })
  1187. // Create Kubernetes Cluster Cache + Watchers
  1188. var k8sCache clustercache.ClusterCache
  1189. if env.IsClusterCacheFileEnabled() {
  1190. importLocation := confManager.ConfigFileAt("/var/configs/cluster-cache.json")
  1191. k8sCache = clustercache.NewClusterImporter(importLocation)
  1192. } else {
  1193. k8sCache = clustercache.NewKubernetesClusterCache(kubeClientset)
  1194. }
  1195. k8sCache.Run()
  1196. cloudProviderKey := env.GetCloudProviderAPIKey()
  1197. cloudProvider, err := cloud.NewProvider(k8sCache, cloudProviderKey, confManager)
  1198. if err != nil {
  1199. panic(err.Error())
  1200. }
  1201. // Append the pricing config watcher
  1202. configWatchers.AddWatcher(cloud.ConfigWatcherFor(cloudProvider))
  1203. watchConfigFunc := configWatchers.ToWatchFunc()
  1204. watchedConfigs := configWatchers.GetWatchedConfigs()
  1205. kubecostNamespace := env.GetKubecostNamespace()
  1206. // We need an initial invocation because the init of the cache has happened before we had access to the provider.
  1207. for _, cw := range watchedConfigs {
  1208. configs, err := kubeClientset.CoreV1().ConfigMaps(kubecostNamespace).Get(context.Background(), cw, metav1.GetOptions{})
  1209. if err != nil {
  1210. klog.Infof("No %s configmap found at install time, using existing configs: %s", cw, err.Error())
  1211. } else {
  1212. klog.Infof("Found configmap %s, watching...", configs.Name)
  1213. watchConfigFunc(configs)
  1214. }
  1215. }
  1216. k8sCache.SetConfigMapUpdateFunc(watchConfigFunc)
  1217. remoteEnabled := env.IsRemoteEnabled()
  1218. if remoteEnabled {
  1219. info, err := cloudProvider.ClusterInfo()
  1220. klog.Infof("Saving cluster with id:'%s', and name:'%s' to durable storage", info["id"], info["name"])
  1221. if err != nil {
  1222. klog.Infof("Error saving cluster id %s", err.Error())
  1223. }
  1224. _, _, err = cloud.GetOrCreateClusterMeta(info["id"], info["name"])
  1225. if err != nil {
  1226. klog.Infof("Unable to set cluster id '%s' for cluster '%s', %s", info["id"], info["name"], err.Error())
  1227. }
  1228. }
  1229. // Thanos Client
  1230. var thanosClient prometheus.Client
  1231. if thanos.IsEnabled() {
  1232. thanosAddress := thanos.QueryURL()
  1233. if thanosAddress != "" {
  1234. thanosCli, _ := thanos.NewThanosClient(thanosAddress, &prom.PrometheusClientConfig{
  1235. Timeout: timeout,
  1236. KeepAlive: keepAlive,
  1237. TLSHandshakeTimeout: tlsHandshakeTimeout,
  1238. TLSInsecureSkipVerify: env.GetInsecureSkipVerify(),
  1239. RetryOnRateLimitResponse: rateLimitRetryOpts,
  1240. Auth: &prom.ClientAuth{
  1241. Username: env.GetMultiClusterBasicAuthUsername(),
  1242. Password: env.GetMultiClusterBasicAuthPassword(),
  1243. BearerToken: env.GetMultiClusterBearerToken(),
  1244. },
  1245. QueryConcurrency: queryConcurrency,
  1246. QueryLogFile: env.GetQueryLoggingFile(),
  1247. })
  1248. _, err = prom.Validate(thanosCli)
  1249. if err != nil {
  1250. klog.V(1).Infof("[Warning] Failed to query Thanos at %s. Error: %s.", thanosAddress, err.Error())
  1251. thanosClient = thanosCli
  1252. } else {
  1253. klog.V(1).Info("Success: retrieved the 'up' query against Thanos at: " + thanosAddress)
  1254. thanosClient = thanosCli
  1255. }
  1256. } else {
  1257. klog.Infof("Error resolving environment variable: $%s", env.ThanosQueryUrlEnvVar)
  1258. }
  1259. }
  1260. // ClusterInfo Provider to provide the cluster map with local and remote cluster data
  1261. var clusterInfoProvider clusters.ClusterInfoProvider
  1262. if env.IsClusterInfoFileEnabled() {
  1263. clusterInfoFile := confManager.ConfigFileAt("/var/configs/cluster-info.json")
  1264. clusterInfoProvider = NewConfiguredClusterInfoProvider(clusterInfoFile)
  1265. } else {
  1266. clusterInfoProvider = NewLocalClusterInfoProvider(kubeClientset, cloudProvider)
  1267. }
  1268. // Initialize ClusterMap for maintaining ClusterInfo by ClusterID
  1269. var clusterMap clusters.ClusterMap
  1270. if thanosClient != nil {
  1271. clusterMap = clusters.NewClusterMap(thanosClient, clusterInfoProvider, 10*time.Minute)
  1272. } else {
  1273. clusterMap = clusters.NewClusterMap(promCli, clusterInfoProvider, 5*time.Minute)
  1274. }
  1275. // cache responses from model and aggregation for a default of 10 minutes;
  1276. // clear expired responses every 20 minutes
  1277. aggregateCache := cache.New(time.Minute*10, time.Minute*20)
  1278. costDataCache := cache.New(time.Minute*10, time.Minute*20)
  1279. clusterCostsCache := cache.New(cache.NoExpiration, cache.NoExpiration)
  1280. outOfClusterCache := cache.New(time.Minute*5, time.Minute*10)
  1281. settingsCache := cache.New(cache.NoExpiration, cache.NoExpiration)
  1282. // query durations that should be cached longer should be registered here
  1283. // use relatively prime numbers to minimize likelihood of synchronized
  1284. // attempts at cache warming
  1285. day := 24 * time.Hour
  1286. cacheExpiration := map[time.Duration]time.Duration{
  1287. day: maxCacheMinutes1d * time.Minute,
  1288. 2 * day: maxCacheMinutes2d * time.Minute,
  1289. 7 * day: maxCacheMinutes7d * time.Minute,
  1290. 30 * day: maxCacheMinutes30d * time.Minute,
  1291. }
  1292. var pc prometheus.Client
  1293. if thanosClient != nil {
  1294. pc = thanosClient
  1295. } else {
  1296. pc = promCli
  1297. }
  1298. costModel := NewCostModel(pc, cloudProvider, k8sCache, clusterMap, scrapeInterval)
  1299. metricsEmitter := NewCostModelMetricsEmitter(promCli, k8sCache, cloudProvider, clusterInfoProvider, costModel)
  1300. a := &Accesses{
  1301. Router: httprouter.New(),
  1302. PrometheusClient: promCli,
  1303. ThanosClient: thanosClient,
  1304. KubeClientSet: kubeClientset,
  1305. ClusterCache: k8sCache,
  1306. ClusterMap: clusterMap,
  1307. CloudProvider: cloudProvider,
  1308. ConfigFileManager: confManager,
  1309. ClusterInfoProvider: clusterInfoProvider,
  1310. Model: costModel,
  1311. MetricsEmitter: metricsEmitter,
  1312. AggregateCache: aggregateCache,
  1313. CostDataCache: costDataCache,
  1314. ClusterCostsCache: clusterCostsCache,
  1315. OutOfClusterCache: outOfClusterCache,
  1316. SettingsCache: settingsCache,
  1317. CacheExpiration: cacheExpiration,
  1318. httpServices: services.NewCostModelServices(),
  1319. }
  1320. // Use the Accesses instance, itself, as the CostModelAggregator. This is
  1321. // confusing and unconventional, but necessary so that we can swap it
  1322. // out for the ETL-adapted version elsewhere.
  1323. // TODO clean this up once ETL is open-sourced.
  1324. a.AggAPI = a
  1325. // Initialize mechanism for subscribing to settings changes
  1326. a.InitializeSettingsPubSub()
  1327. err = a.CloudProvider.DownloadPricingData()
  1328. if err != nil {
  1329. klog.V(1).Info("Failed to download pricing data: " + err.Error())
  1330. }
  1331. // Warm the aggregate cache unless explicitly set to false
  1332. if env.IsCacheWarmingEnabled() {
  1333. log.Infof("Init: AggregateCostModel cache warming enabled")
  1334. a.warmAggregateCostModelCache()
  1335. } else {
  1336. log.Infof("Init: AggregateCostModel cache warming disabled")
  1337. }
  1338. if !env.IsKubecostMetricsPodEnabled() {
  1339. a.MetricsEmitter.Start()
  1340. }
  1341. a.Router.GET("/costDataModel", a.CostDataModel)
  1342. a.Router.GET("/costDataModelRange", a.CostDataModelRange)
  1343. a.Router.GET("/aggregatedCostModel", a.AggregateCostModelHandler)
  1344. a.Router.GET("/allocation/compute", a.ComputeAllocationHandler)
  1345. a.Router.GET("/allNodePricing", a.GetAllNodePricing)
  1346. a.Router.POST("/refreshPricing", a.RefreshPricingData)
  1347. a.Router.GET("/clusterCostsOverTime", a.ClusterCostsOverTime)
  1348. a.Router.GET("/clusterCosts", a.ClusterCosts)
  1349. a.Router.GET("/clusterCostsFromCache", a.ClusterCostsFromCacheHandler)
  1350. a.Router.GET("/validatePrometheus", a.GetPrometheusMetadata)
  1351. a.Router.GET("/managementPlatform", a.ManagementPlatform)
  1352. a.Router.GET("/clusterInfo", a.ClusterInfo)
  1353. a.Router.GET("/clusterInfoMap", a.GetClusterInfoMap)
  1354. a.Router.GET("/serviceAccountStatus", a.GetServiceAccountStatus)
  1355. a.Router.GET("/pricingSourceStatus", a.GetPricingSourceStatus)
  1356. a.Router.GET("/pricingSourceCounts", a.GetPricingSourceCounts)
  1357. // endpoints migrated from server
  1358. a.Router.GET("/allPersistentVolumes", a.GetAllPersistentVolumes)
  1359. a.Router.GET("/allDeployments", a.GetAllDeployments)
  1360. a.Router.GET("/allStorageClasses", a.GetAllStorageClasses)
  1361. a.Router.GET("/allStatefulSets", a.GetAllStatefulSets)
  1362. a.Router.GET("/allNodes", a.GetAllNodes)
  1363. a.Router.GET("/allPods", a.GetAllPods)
  1364. a.Router.GET("/allNamespaces", a.GetAllNamespaces)
  1365. a.Router.GET("/allDaemonSets", a.GetAllDaemonSets)
  1366. a.Router.GET("/pod/:namespace/:name", a.GetPod)
  1367. a.Router.GET("/prometheusRecordingRules", a.PrometheusRecordingRules)
  1368. a.Router.GET("/prometheusConfig", a.PrometheusConfig)
  1369. a.Router.GET("/prometheusTargets", a.PrometheusTargets)
  1370. a.Router.GET("/orphanedPods", a.GetOrphanedPods)
  1371. a.Router.GET("/installNamespace", a.GetInstallNamespace)
  1372. a.Router.GET("/podLogs", a.GetPodLogs)
  1373. a.Router.POST("/serviceKey", a.AddServiceKey)
  1374. a.Router.GET("/helmValues", a.GetHelmValues)
  1375. a.Router.GET("/status", a.Status)
  1376. // prom query proxies
  1377. a.Router.GET("/prometheusQuery", a.PrometheusQuery)
  1378. a.Router.GET("/prometheusQueryRange", a.PrometheusQueryRange)
  1379. a.Router.GET("/thanosQuery", a.ThanosQuery)
  1380. a.Router.GET("/thanosQueryRange", a.ThanosQueryRange)
  1381. // diagnostics
  1382. a.Router.GET("/diagnostics/requestQueue", a.GetPrometheusQueueState)
  1383. a.Router.GET("/diagnostics/prometheusMetrics", a.GetPrometheusMetrics)
  1384. a.httpServices.RegisterAll(a.Router)
  1385. return a
  1386. }