provider.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. package cloud
  2. import (
  3. "database/sql"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "regexp"
  8. "strings"
  9. "time"
  10. "k8s.io/klog"
  11. "cloud.google.com/go/compute/metadata"
  12. "github.com/kubecost/cost-model/pkg/clustercache"
  13. "github.com/kubecost/cost-model/pkg/env"
  14. v1 "k8s.io/api/core/v1"
  15. )
  16. const authSecretPath = "/var/secrets/service-key.json"
  17. const storageConfigSecretPath = "/var/azure-storage-config/azure-storage-config.json"
  18. const defaultShareTenancyCost = "true"
  19. var createTableStatements = []string{
  20. `CREATE TABLE IF NOT EXISTS names (
  21. cluster_id VARCHAR(255) NOT NULL,
  22. cluster_name VARCHAR(255) NULL,
  23. PRIMARY KEY (cluster_id)
  24. );`,
  25. }
  26. // ReservedInstanceData keeps record of resources on a node should be
  27. // priced at reserved rates
  28. type ReservedInstanceData struct {
  29. ReservedCPU int64 `json:"reservedCPU"`
  30. ReservedRAM int64 `json:"reservedRAM"`
  31. CPUCost float64 `json:"CPUHourlyCost"`
  32. RAMCost float64 `json:"RAMHourlyCost"`
  33. }
  34. // Node is the interface by which the provider and cost model communicate Node prices.
  35. // The provider will best-effort try to fill out this struct.
  36. type Node struct {
  37. Cost string `json:"hourlyCost"`
  38. VCPU string `json:"CPU"`
  39. VCPUCost string `json:"CPUHourlyCost"`
  40. RAM string `json:"RAM"`
  41. RAMBytes string `json:"RAMBytes"`
  42. RAMCost string `json:"RAMGBHourlyCost"`
  43. Storage string `json:"storage"`
  44. StorageCost string `json:"storageHourlyCost"`
  45. UsesBaseCPUPrice bool `json:"usesDefaultPrice"`
  46. BaseCPUPrice string `json:"baseCPUPrice"` // Used to compute an implicit RAM GB/Hr price when RAM pricing is not provided.
  47. BaseRAMPrice string `json:"baseRAMPrice"` // Used to compute an implicit RAM GB/Hr price when RAM pricing is not provided.
  48. BaseGPUPrice string `json:"baseGPUPrice"`
  49. UsageType string `json:"usageType"`
  50. GPU string `json:"gpu"` // GPU represents the number of GPU on the instance
  51. GPUName string `json:"gpuName"`
  52. GPUCost string `json:"gpuCost"`
  53. InstanceType string `json:"instanceType,omitempty"`
  54. Region string `json:"region,omitempty"`
  55. Reserved *ReservedInstanceData `json:"reserved,omitempty"`
  56. ProviderID string `json:"providerID,omitempty"`
  57. PricingType PricingType `json:"pricingType,omitempty"`
  58. }
  59. // IsSpot determines whether or not a Node uses spot by usage type
  60. func (n *Node) IsSpot() bool {
  61. if n != nil {
  62. return strings.Contains(n.UsageType, "spot") || strings.Contains(n.UsageType, "emptible")
  63. } else {
  64. return false
  65. }
  66. }
  67. // LoadBalancer is the interface by which the provider and cost model communicate LoadBalancer prices.
  68. // The provider will best-effort try to fill out this struct.
  69. type LoadBalancer struct {
  70. IngressIPAddresses []string `json:"IngressIPAddresses"`
  71. Cost float64 `json:"hourlyCost"`
  72. }
  73. // TODO: used for dynamic cloud provider price fetching.
  74. // determine what identifies a load balancer in the json returned from the cloud provider pricing API call
  75. // type LBKey interface {
  76. // }
  77. // Network is the interface by which the provider and cost model communicate network egress prices.
  78. // The provider will best-effort try to fill out this struct.
  79. type Network struct {
  80. ZoneNetworkEgressCost float64
  81. RegionNetworkEgressCost float64
  82. InternetNetworkEgressCost float64
  83. }
  84. // PV is the interface by which the provider and cost model communicate PV prices.
  85. // The provider will best-effort try to fill out this struct.
  86. type PV struct {
  87. Cost string `json:"hourlyCost"`
  88. CostPerIO string `json:"costPerIOOperation"`
  89. Class string `json:"storageClass"`
  90. Size string `json:"size"`
  91. Region string `json:"region"`
  92. ProviderID string `json:"providerID,omitempty"`
  93. Parameters map[string]string `json:"parameters"`
  94. }
  95. // Key represents a way for nodes to match between the k8s API and a pricing API
  96. type Key interface {
  97. ID() string // ID represents an exact match
  98. Features() string // Features are a comma separated string of node metadata that could match pricing
  99. GPUType() string // GPUType returns "" if no GPU exists, but the name of the GPU otherwise
  100. }
  101. type PVKey interface {
  102. Features() string
  103. GetStorageClass() string
  104. ID() string
  105. }
  106. // OutOfClusterAllocation represents a cloud provider cost not associated with kubernetes
  107. type OutOfClusterAllocation struct {
  108. Aggregator string `json:"aggregator"`
  109. Environment string `json:"environment"`
  110. Service string `json:"service"`
  111. Cost float64 `json:"cost"`
  112. Cluster string `json:"cluster"`
  113. }
  114. type CustomPricing struct {
  115. Provider string `json:"provider"`
  116. Description string `json:"description"`
  117. CPU string `json:"CPU"`
  118. SpotCPU string `json:"spotCPU"`
  119. RAM string `json:"RAM"`
  120. SpotRAM string `json:"spotRAM"`
  121. GPU string `json:"GPU"`
  122. SpotGPU string `json:"spotGPU"`
  123. Storage string `json:"storage"`
  124. ZoneNetworkEgress string `json:"zoneNetworkEgress"`
  125. RegionNetworkEgress string `json:"regionNetworkEgress"`
  126. InternetNetworkEgress string `json:"internetNetworkEgress"`
  127. FirstFiveForwardingRulesCost string `json:"firstFiveForwardingRulesCost"`
  128. AdditionalForwardingRuleCost string `json:"additionalForwardingRuleCost"`
  129. LBIngressDataCost string `json:"LBIngressDataCost"`
  130. SpotLabel string `json:"spotLabel,omitempty"`
  131. SpotLabelValue string `json:"spotLabelValue,omitempty"`
  132. GpuLabel string `json:"gpuLabel,omitempty"`
  133. GpuLabelValue string `json:"gpuLabelValue,omitempty"`
  134. ServiceKeyName string `json:"awsServiceKeyName,omitempty"`
  135. ServiceKeySecret string `json:"awsServiceKeySecret,omitempty"`
  136. SpotDataRegion string `json:"awsSpotDataRegion,omitempty"`
  137. SpotDataBucket string `json:"awsSpotDataBucket,omitempty"`
  138. SpotDataPrefix string `json:"awsSpotDataPrefix,omitempty"`
  139. ProjectID string `json:"projectID,omitempty"`
  140. AthenaProjectID string `json:"athenaProjectID,omitempty"`
  141. AthenaBucketName string `json:"athenaBucketName"`
  142. AthenaRegion string `json:"athenaRegion"`
  143. AthenaDatabase string `json:"athenaDatabase"`
  144. AthenaTable string `json:"athenaTable"`
  145. MasterPayerARN string `json:"masterPayerARN"`
  146. BillingDataDataset string `json:"billingDataDataset,omitempty"`
  147. CustomPricesEnabled string `json:"customPricesEnabled"`
  148. DefaultIdle string `json:"defaultIdle"`
  149. AzureSubscriptionID string `json:"azureSubscriptionID"`
  150. AzureClientID string `json:"azureClientID"`
  151. AzureClientSecret string `json:"azureClientSecret"`
  152. AzureTenantID string `json:"azureTenantID"`
  153. AzureBillingRegion string `json:"azureBillingRegion"`
  154. CurrencyCode string `json:"currencyCode"`
  155. Discount string `json:"discount"`
  156. NegotiatedDiscount string `json:"negotiatedDiscount"`
  157. SharedOverhead string `json:"sharedOverhead"`
  158. ClusterName string `json:"clusterName"`
  159. SharedNamespaces string `json:"sharedNamespaces"`
  160. SharedLabelNames string `json:"sharedLabelNames"`
  161. SharedLabelValues string `json:"sharedLabelValues"`
  162. ShareTenancyCosts string `json:"shareTenancyCosts"` // TODO clean up configuration so we can use a type other that string (this should be a bool, but the app panics if it's not a string)
  163. ReadOnly string `json:"readOnly"`
  164. KubecostToken string `json:"kubecostToken"`
  165. }
  166. type ServiceAccountStatus struct {
  167. Checks []*ServiceAccountCheck `json:"checks"`
  168. }
  169. type ServiceAccountCheck struct {
  170. Message string `json:"message"`
  171. Status bool `json:"status"`
  172. AdditionalInfo string `json:"additionalInfo"`
  173. }
  174. type PricingSources struct {
  175. PricingSources map[string]*PricingSource
  176. }
  177. type PricingSource struct {
  178. Name string `json:"name"`
  179. Available bool `json:"available"`
  180. Error string `json:"error"`
  181. }
  182. type PricingType string
  183. const (
  184. Api PricingType = "api"
  185. Spot PricingType = "spot"
  186. Reserved PricingType = "reserved"
  187. SavingsPlan PricingType = "savingsPlan"
  188. CsvExact PricingType = "csvExact"
  189. CsvClass PricingType = "csvClass"
  190. DefaultPrices PricingType = "defaultPrices"
  191. )
  192. type PricingMatchMetadata struct {
  193. TotalNodes int `json:"TotalNodes"`
  194. PricingTypeCounts map[PricingType]int `json:"PricingType"`
  195. }
  196. // Provider represents a k8s provider.
  197. type Provider interface {
  198. ClusterInfo() (map[string]string, error)
  199. GetAddresses() ([]byte, error)
  200. GetDisks() ([]byte, error)
  201. NodePricing(Key) (*Node, error)
  202. PVPricing(PVKey) (*PV, error)
  203. NetworkPricing() (*Network, error) // TODO: add key interface arg for dynamic price fetching
  204. LoadBalancerPricing() (*LoadBalancer, error) // TODO: add key interface arg for dynamic price fetching
  205. AllNodePricing() (interface{}, error)
  206. DownloadPricingData() error
  207. GetKey(map[string]string, *v1.Node) Key
  208. GetPVKey(*v1.PersistentVolume, map[string]string, string) PVKey
  209. UpdateConfig(r io.Reader, updateType string) (*CustomPricing, error)
  210. UpdateConfigFromConfigMap(map[string]string) (*CustomPricing, error)
  211. GetConfig() (*CustomPricing, error)
  212. GetManagementPlatform() (string, error)
  213. GetLocalStorageQuery(time.Duration, time.Duration, bool, bool) string
  214. ExternalAllocations(string, string, []string, string, string, bool) ([]*OutOfClusterAllocation, error)
  215. ApplyReservedInstancePricing(map[string]*Node)
  216. ServiceAccountStatus() *ServiceAccountStatus
  217. PricingSourceStatus() map[string]*PricingSource
  218. ClusterManagementPricing() (string, float64, error)
  219. CombinedDiscountForNode(string, bool, float64, float64) float64
  220. }
  221. // ClusterName returns the name defined in cluster info, defaulting to the
  222. // CLUSTER_ID environment variable
  223. func ClusterName(p Provider) string {
  224. info, err := p.ClusterInfo()
  225. if err != nil {
  226. return env.GetClusterID()
  227. }
  228. name, ok := info["name"]
  229. if !ok {
  230. return env.GetClusterID()
  231. }
  232. return name
  233. }
  234. // CustomPricesEnabled returns the boolean equivalent of the cloup provider's custom prices flag,
  235. // indicating whether or not the cluster is using custom pricing.
  236. func CustomPricesEnabled(p Provider) bool {
  237. config, err := p.GetConfig()
  238. if err != nil {
  239. return false
  240. }
  241. // TODO:CLEANUP what is going on with this?
  242. if config.NegotiatedDiscount == "" {
  243. config.NegotiatedDiscount = "0%"
  244. }
  245. return config.CustomPricesEnabled == "true"
  246. }
  247. // AllocateIdleByDefault returns true if the application settings specify to allocate idle by default
  248. func AllocateIdleByDefault(p Provider) bool {
  249. config, err := p.GetConfig()
  250. if err != nil {
  251. return false
  252. }
  253. return config.DefaultIdle == "true"
  254. }
  255. // SharedNamespace returns a list of names of shared namespaces, as defined in the application settings
  256. func SharedNamespaces(p Provider) []string {
  257. namespaces := []string{}
  258. config, err := p.GetConfig()
  259. if err != nil {
  260. return namespaces
  261. }
  262. if config.SharedNamespaces == "" {
  263. return namespaces
  264. }
  265. // trim spaces so that "kube-system, kubecost" is equivalent to "kube-system,kubecost"
  266. for _, ns := range strings.Split(config.SharedNamespaces, ",") {
  267. namespaces = append(namespaces, strings.Trim(ns, " "))
  268. }
  269. return namespaces
  270. }
  271. // SharedLabel returns the configured set of shared labels as a parallel tuple of keys to values; e.g.
  272. // for app:kubecost,type:staging this returns (["app", "type"], ["kubecost", "staging"]) in order to
  273. // match the signature of the NewSharedResourceInfo
  274. func SharedLabels(p Provider) ([]string, []string) {
  275. names := []string{}
  276. values := []string{}
  277. config, err := p.GetConfig()
  278. if err != nil {
  279. return names, values
  280. }
  281. if config.SharedLabelNames == "" || config.SharedLabelValues == "" {
  282. return names, values
  283. }
  284. ks := strings.Split(config.SharedLabelNames, ",")
  285. vs := strings.Split(config.SharedLabelValues, ",")
  286. if len(ks) != len(vs) {
  287. klog.V(2).Infof("[Warning] shared labels have mis-matched lengths: %d names, %d values", len(ks), len(vs))
  288. return names, values
  289. }
  290. for i := range ks {
  291. names = append(names, strings.Trim(ks[i], " "))
  292. values = append(values, strings.Trim(vs[i], " "))
  293. }
  294. return names, values
  295. }
  296. // ShareTenancyCosts returns true if the application settings specify to share
  297. // tenancy costs by default.
  298. func ShareTenancyCosts(p Provider) bool {
  299. config, err := p.GetConfig()
  300. if err != nil {
  301. return false
  302. }
  303. return config.ShareTenancyCosts == "true"
  304. }
  305. func NewCrossClusterProvider(ctype string, overrideConfigPath string, cache clustercache.ClusterCache) (Provider, error) {
  306. if ctype == "aws" {
  307. return &AWS{
  308. Clientset: cache,
  309. Config: NewProviderConfig(overrideConfigPath),
  310. }, nil
  311. } else if ctype == "gcp" {
  312. return &GCP{
  313. Clientset: cache,
  314. Config: NewProviderConfig(overrideConfigPath),
  315. }, nil
  316. } else if ctype == "azure" {
  317. return &Azure{
  318. Clientset: cache,
  319. Config: NewProviderConfig(overrideConfigPath),
  320. }, nil
  321. }
  322. return &CustomProvider{
  323. Clientset: cache,
  324. Config: NewProviderConfig(overrideConfigPath),
  325. }, nil
  326. }
  327. // NewProvider looks at the nodespec or provider metadata server to decide which provider to instantiate.
  328. func NewProvider(cache clustercache.ClusterCache, apiKey string) (Provider, error) {
  329. nodes := cache.GetAllNodes()
  330. if len(nodes) == 0 {
  331. return nil, fmt.Errorf("Could not locate any nodes for cluster.")
  332. }
  333. provider := strings.ToLower(nodes[0].Spec.ProviderID)
  334. if env.IsUseCSVProvider() {
  335. klog.Infof("Using CSV Provider with CSV at %s", env.GetCSVPath())
  336. configFileName := ""
  337. if metadata.OnGCE() {
  338. configFileName = "gcp.json"
  339. } else if strings.HasPrefix(provider, "aws") {
  340. configFileName = "aws.json"
  341. } else if strings.HasPrefix(provider, "azure") {
  342. configFileName = "azure.json"
  343. } else {
  344. configFileName = "default.json"
  345. }
  346. return &CSVProvider{
  347. CSVLocation: env.GetCSVPath(),
  348. CustomProvider: &CustomProvider{
  349. Clientset: cache,
  350. Config: NewProviderConfig(configFileName),
  351. },
  352. }, nil
  353. }
  354. if metadata.OnGCE() {
  355. klog.V(3).Info("metadata reports we are in GCE")
  356. if apiKey == "" {
  357. return nil, errors.New("Supply a GCP Key to start getting data")
  358. }
  359. return &GCP{
  360. Clientset: cache,
  361. APIKey: apiKey,
  362. Config: NewProviderConfig("gcp.json"),
  363. }, nil
  364. }
  365. if strings.HasPrefix(provider, "aws") {
  366. klog.V(2).Info("Found ProviderID starting with \"aws\", using AWS Provider")
  367. return &AWS{
  368. Clientset: cache,
  369. Config: NewProviderConfig("aws.json"),
  370. }, nil
  371. } else if strings.HasPrefix(provider, "azure") {
  372. klog.V(2).Info("Found ProviderID starting with \"azure\", using Azure Provider")
  373. return &Azure{
  374. Clientset: cache,
  375. Config: NewProviderConfig("azure.json"),
  376. }, nil
  377. } else {
  378. klog.V(2).Info("Unsupported provider, falling back to default")
  379. return &CustomProvider{
  380. Clientset: cache,
  381. Config: NewProviderConfig("default.json"),
  382. }, nil
  383. }
  384. }
  385. func UpdateClusterMeta(cluster_id, cluster_name string) error {
  386. pw := env.GetRemotePW()
  387. address := env.GetSQLAddress()
  388. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  389. db, err := sql.Open("postgres", connStr)
  390. if err != nil {
  391. return err
  392. }
  393. defer db.Close()
  394. updateStmt := `UPDATE names SET cluster_name = $1 WHERE cluster_id = $2;`
  395. _, err = db.Exec(updateStmt, cluster_name, cluster_id)
  396. if err != nil {
  397. return err
  398. }
  399. return nil
  400. }
  401. func CreateClusterMeta(cluster_id, cluster_name string) error {
  402. pw := env.GetRemotePW()
  403. address := env.GetSQLAddress()
  404. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  405. db, err := sql.Open("postgres", connStr)
  406. if err != nil {
  407. return err
  408. }
  409. defer db.Close()
  410. for _, stmt := range createTableStatements {
  411. _, err := db.Exec(stmt)
  412. if err != nil {
  413. return err
  414. }
  415. }
  416. insertStmt := `INSERT INTO names (cluster_id, cluster_name) VALUES ($1, $2);`
  417. _, err = db.Exec(insertStmt, cluster_id, cluster_name)
  418. if err != nil {
  419. return err
  420. }
  421. return nil
  422. }
  423. func GetClusterMeta(cluster_id string) (string, string, error) {
  424. pw := env.GetRemotePW()
  425. address := env.GetSQLAddress()
  426. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  427. db, err := sql.Open("postgres", connStr)
  428. defer db.Close()
  429. query := `SELECT cluster_id, cluster_name
  430. FROM names
  431. WHERE cluster_id = ?`
  432. rows, err := db.Query(query, cluster_id)
  433. if err != nil {
  434. return "", "", err
  435. }
  436. defer rows.Close()
  437. var (
  438. sql_cluster_id string
  439. cluster_name string
  440. )
  441. for rows.Next() {
  442. if err := rows.Scan(&sql_cluster_id, &cluster_name); err != nil {
  443. return "", "", err
  444. }
  445. }
  446. return sql_cluster_id, cluster_name, nil
  447. }
  448. func GetOrCreateClusterMeta(cluster_id, cluster_name string) (string, string, error) {
  449. id, name, err := GetClusterMeta(cluster_id)
  450. if err != nil {
  451. err := CreateClusterMeta(cluster_id, cluster_name)
  452. if err != nil {
  453. return "", "", err
  454. }
  455. }
  456. if id == "" {
  457. err := CreateClusterMeta(cluster_id, cluster_name)
  458. if err != nil {
  459. return "", "", err
  460. }
  461. }
  462. return id, name, nil
  463. }
  464. // ParseID attempts to parse a ProviderId from a string based on formats from the various providers and
  465. // returns the string as is if it cannot find a match
  466. func ParseID(id string) string {
  467. // It's of the form aws:///us-east-2a/i-0fea4fd46592d050b and we want i-0fea4fd46592d050b, if it exists
  468. rx := regexp.MustCompile("aws://[^/]*/[^/]*/([^/]+)")
  469. match := rx.FindStringSubmatch(id)
  470. if len(match) >= 2 {
  471. return match[1]
  472. }
  473. // gce://guestbook-227502/us-central1-a/gke-niko-n1-standard-2-wljla-8df8e58a-hfy7
  474. // => gke-niko-n1-standard-2-wljla-8df8e58a-hfy7
  475. rx = regexp.MustCompile("gce://[^/]*/[^/]*/([^/]+)")
  476. match = rx.FindStringSubmatch(id)
  477. if len(match) >= 2 {
  478. return match[1]
  479. }
  480. // Return id for Azure Provider, CSV Provider and Custom Provider
  481. return id
  482. }
  483. // ParsePVID attempts to parse a PV ProviderId from a string based on formats from the various providers and
  484. // returns the string as is if it cannot find a match
  485. func ParsePVID(id string) string {
  486. // Capture "vol-0fc54c5e83b8d2b76" from "aws://us-east-2a/vol-0fc54c5e83b8d2b76"
  487. rx := regexp.MustCompile("aws:/[^/]*/[^/]*/([^/]+)")
  488. match := rx.FindStringSubmatch(id)
  489. if len(match) >= 2 {
  490. return match[1]
  491. }
  492. // Return id for GCP Provider, Azure Provider, CSV Provider and Custom Provider
  493. return id
  494. }
  495. // ParseLBID attempts to parse a LB ProviderId from a string based on formats from the various providers and
  496. // returns the string as is if it cannot find a match
  497. func ParseLBID(id string) string {
  498. rx := regexp.MustCompile("^([^-]+)-.+amazonaws\\.com$") // Capture "ad9d88195b52a47c89b5055120f28c58" from "ad9d88195b52a47c89b5055120f28c58-1037804914.us-east-2.elb.amazonaws.com"
  499. match := rx.FindStringSubmatch(id)
  500. if len(match) >= 2 {
  501. return match[1]
  502. }
  503. // Return id for GCP Provider, Azure Provider, CSV Provider and Custom Provider
  504. return id
  505. }