provider.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. package cloud
  2. import (
  3. "database/sql"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "regexp"
  8. "strings"
  9. "time"
  10. "k8s.io/klog"
  11. "cloud.google.com/go/compute/metadata"
  12. "github.com/kubecost/cost-model/pkg/clustercache"
  13. "github.com/kubecost/cost-model/pkg/env"
  14. v1 "k8s.io/api/core/v1"
  15. )
  16. const authSecretPath = "/var/secrets/service-key.json"
  17. const storageConfigSecretPath = "/var/azure-storage-config/azure-storage-config.json"
  18. var createTableStatements = []string{
  19. `CREATE TABLE IF NOT EXISTS names (
  20. cluster_id VARCHAR(255) NOT NULL,
  21. cluster_name VARCHAR(255) NULL,
  22. PRIMARY KEY (cluster_id)
  23. );`,
  24. }
  25. // ReservedInstanceData keeps record of resources on a node should be
  26. // priced at reserved rates
  27. type ReservedInstanceData struct {
  28. ReservedCPU int64 `json:"reservedCPU"`
  29. ReservedRAM int64 `json:"reservedRAM"`
  30. CPUCost float64 `json:"CPUHourlyCost"`
  31. RAMCost float64 `json:"RAMHourlyCost"`
  32. }
  33. // Node is the interface by which the provider and cost model communicate Node prices.
  34. // The provider will best-effort try to fill out this struct.
  35. type Node struct {
  36. Cost string `json:"hourlyCost"`
  37. VCPU string `json:"CPU"`
  38. VCPUCost string `json:"CPUHourlyCost"`
  39. RAM string `json:"RAM"`
  40. RAMBytes string `json:"RAMBytes"`
  41. RAMCost string `json:"RAMGBHourlyCost"`
  42. Storage string `json:"storage"`
  43. StorageCost string `json:"storageHourlyCost"`
  44. UsesBaseCPUPrice bool `json:"usesDefaultPrice"`
  45. BaseCPUPrice string `json:"baseCPUPrice"` // Used to compute an implicit RAM GB/Hr price when RAM pricing is not provided.
  46. BaseRAMPrice string `json:"baseRAMPrice"` // Used to compute an implicit RAM GB/Hr price when RAM pricing is not provided.
  47. BaseGPUPrice string `json:"baseGPUPrice"`
  48. UsageType string `json:"usageType"`
  49. GPU string `json:"gpu"` // GPU represents the number of GPU on the instance
  50. GPUName string `json:"gpuName"`
  51. GPUCost string `json:"gpuCost"`
  52. InstanceType string `json:"instanceType,omitempty"`
  53. Region string `json:"region,omitempty"`
  54. Reserved *ReservedInstanceData `json:"reserved,omitempty"`
  55. ProviderID string `json:"providerID,omitempty"`
  56. PricingType PricingType `json:"pricingType,omitempty"`
  57. }
  58. // IsSpot determines whether or not a Node uses spot by usage type
  59. func (n *Node) IsSpot() bool {
  60. if n != nil {
  61. return strings.Contains(n.UsageType, "spot") || strings.Contains(n.UsageType, "emptible")
  62. } else {
  63. return false
  64. }
  65. }
  66. // LoadBalancer is the interface by which the provider and cost model communicate LoadBalancer prices.
  67. // The provider will best-effort try to fill out this struct.
  68. type LoadBalancer struct {
  69. IngressIPAddresses []string `json:"IngressIPAddresses"`
  70. Cost float64 `json:"hourlyCost"`
  71. }
  72. // TODO: used for dynamic cloud provider price fetching.
  73. // determine what identifies a load balancer in the json returned from the cloud provider pricing API call
  74. // type LBKey interface {
  75. // }
  76. // Network is the interface by which the provider and cost model communicate network egress prices.
  77. // The provider will best-effort try to fill out this struct.
  78. type Network struct {
  79. ZoneNetworkEgressCost float64
  80. RegionNetworkEgressCost float64
  81. InternetNetworkEgressCost float64
  82. }
  83. // PV is the interface by which the provider and cost model communicate PV prices.
  84. // The provider will best-effort try to fill out this struct.
  85. type PV struct {
  86. Cost string `json:"hourlyCost"`
  87. CostPerIO string `json:"costPerIOOperation"`
  88. Class string `json:"storageClass"`
  89. Size string `json:"size"`
  90. Region string `json:"region"`
  91. ProviderID string `json:"providerID,omitempty"`
  92. Parameters map[string]string `json:"parameters"`
  93. }
  94. // Key represents a way for nodes to match between the k8s API and a pricing API
  95. type Key interface {
  96. ID() string // ID represents an exact match
  97. Features() string // Features are a comma separated string of node metadata that could match pricing
  98. GPUType() string // GPUType returns "" if no GPU exists, but the name of the GPU otherwise
  99. }
  100. type PVKey interface {
  101. Features() string
  102. GetStorageClass() string
  103. ID() string
  104. }
  105. // OutOfClusterAllocation represents a cloud provider cost not associated with kubernetes
  106. type OutOfClusterAllocation struct {
  107. Aggregator string `json:"aggregator"`
  108. Environment string `json:"environment"`
  109. Service string `json:"service"`
  110. Cost float64 `json:"cost"`
  111. Cluster string `json:"cluster"`
  112. }
  113. type CustomPricing struct {
  114. Provider string `json:"provider"`
  115. Description string `json:"description"`
  116. CPU string `json:"CPU"`
  117. SpotCPU string `json:"spotCPU"`
  118. RAM string `json:"RAM"`
  119. SpotRAM string `json:"spotRAM"`
  120. GPU string `json:"GPU"`
  121. SpotGPU string `json:"spotGPU"`
  122. Storage string `json:"storage"`
  123. ZoneNetworkEgress string `json:"zoneNetworkEgress"`
  124. RegionNetworkEgress string `json:"regionNetworkEgress"`
  125. InternetNetworkEgress string `json:"internetNetworkEgress"`
  126. FirstFiveForwardingRulesCost string `json:"firstFiveForwardingRulesCost"`
  127. AdditionalForwardingRuleCost string `json:"additionalForwardingRuleCost"`
  128. LBIngressDataCost string `json:"LBIngressDataCost"`
  129. SpotLabel string `json:"spotLabel,omitempty"`
  130. SpotLabelValue string `json:"spotLabelValue,omitempty"`
  131. GpuLabel string `json:"gpuLabel,omitempty"`
  132. GpuLabelValue string `json:"gpuLabelValue,omitempty"`
  133. ServiceKeyName string `json:"awsServiceKeyName,omitempty"`
  134. ServiceKeySecret string `json:"awsServiceKeySecret,omitempty"`
  135. SpotDataRegion string `json:"awsSpotDataRegion,omitempty"`
  136. SpotDataBucket string `json:"awsSpotDataBucket,omitempty"`
  137. SpotDataPrefix string `json:"awsSpotDataPrefix,omitempty"`
  138. ProjectID string `json:"projectID,omitempty"`
  139. AthenaProjectID string `json:"athenaProjectID,omitempty"`
  140. AthenaBucketName string `json:"athenaBucketName"`
  141. AthenaRegion string `json:"athenaRegion"`
  142. AthenaDatabase string `json:"athenaDatabase"`
  143. AthenaTable string `json:"athenaTable"`
  144. MasterPayerARN string `json:"masterPayerARN"`
  145. BillingDataDataset string `json:"billingDataDataset,omitempty"`
  146. CustomPricesEnabled string `json:"customPricesEnabled"`
  147. DefaultIdle string `json:"defaultIdle"`
  148. AzureSubscriptionID string `json:"azureSubscriptionID"`
  149. AzureClientID string `json:"azureClientID"`
  150. AzureClientSecret string `json:"azureClientSecret"`
  151. AzureTenantID string `json:"azureTenantID"`
  152. AzureBillingRegion string `json:"azureBillingRegion"`
  153. CurrencyCode string `json:"currencyCode"`
  154. Discount string `json:"discount"`
  155. NegotiatedDiscount string `json:"negotiatedDiscount"`
  156. SharedCosts map[string]string `json:"sharedCost"`
  157. ClusterName string `json:"clusterName"`
  158. SharedNamespaces string `json:"sharedNamespaces"`
  159. SharedLabelNames string `json:"sharedLabelNames"`
  160. SharedLabelValues string `json:"sharedLabelValues"`
  161. ReadOnly string `json:"readOnly"`
  162. KubecostToken string `json:"kubecostToken"`
  163. }
  164. type ServiceAccountStatus struct {
  165. Checks []*ServiceAccountCheck `json:"checks"`
  166. }
  167. type ServiceAccountCheck struct {
  168. Message string `json:"message"`
  169. Status bool `json:"status"`
  170. AdditionalInfo string `json:"additionalInfo"`
  171. }
  172. type PricingSources struct {
  173. PricingSources map[string]*PricingSource
  174. }
  175. type PricingSource struct {
  176. Name string `json:"name"`
  177. Available bool `json:"available"`
  178. Error string `json:"error"`
  179. }
  180. type PricingType string
  181. const (
  182. Api PricingType = "api"
  183. Spot PricingType = "spot"
  184. Reserved PricingType = "reserved"
  185. SavingsPlan PricingType = "savingsPlan"
  186. CsvExact PricingType = "csvExact"
  187. CsvClass PricingType = "csvClass"
  188. DefaultPrices PricingType = "defaultPrices"
  189. )
  190. type PricingMatchMetadata struct {
  191. TotalNodes int `json:"TotalNodes"`
  192. PricingTypeCounts map[PricingType]int `json:"PricingType"`
  193. }
  194. // Provider represents a k8s provider.
  195. type Provider interface {
  196. ClusterInfo() (map[string]string, error)
  197. GetAddresses() ([]byte, error)
  198. GetDisks() ([]byte, error)
  199. NodePricing(Key) (*Node, error)
  200. PVPricing(PVKey) (*PV, error)
  201. NetworkPricing() (*Network, error) // TODO: add key interface arg for dynamic price fetching
  202. LoadBalancerPricing() (*LoadBalancer, error) // TODO: add key interface arg for dynamic price fetching
  203. AllNodePricing() (interface{}, error)
  204. DownloadPricingData() error
  205. GetKey(map[string]string, *v1.Node) Key
  206. GetPVKey(*v1.PersistentVolume, map[string]string, string) PVKey
  207. UpdateConfig(r io.Reader, updateType string) (*CustomPricing, error)
  208. UpdateConfigFromConfigMap(map[string]string) (*CustomPricing, error)
  209. GetConfig() (*CustomPricing, error)
  210. GetManagementPlatform() (string, error)
  211. GetLocalStorageQuery(time.Duration, time.Duration, bool, bool) string
  212. ExternalAllocations(string, string, []string, string, string, bool) ([]*OutOfClusterAllocation, error)
  213. ApplyReservedInstancePricing(map[string]*Node)
  214. ServiceAccountStatus() *ServiceAccountStatus
  215. PricingSourceStatus() map[string]*PricingSource
  216. ClusterManagementPricing() (string, float64, error)
  217. CombinedDiscountForNode(string, bool, float64, float64) float64
  218. }
  219. // ClusterName returns the name defined in cluster info, defaulting to the
  220. // CLUSTER_ID environment variable
  221. func ClusterName(p Provider) string {
  222. info, err := p.ClusterInfo()
  223. if err != nil {
  224. return env.GetClusterID()
  225. }
  226. name, ok := info["name"]
  227. if !ok {
  228. return env.GetClusterID()
  229. }
  230. return name
  231. }
  232. // CustomPricesEnabled returns the boolean equivalent of the cloup provider's custom prices flag,
  233. // indicating whether or not the cluster is using custom pricing.
  234. func CustomPricesEnabled(p Provider) bool {
  235. config, err := p.GetConfig()
  236. if err != nil {
  237. return false
  238. }
  239. // TODO:CLEANUP what is going on with this?
  240. if config.NegotiatedDiscount == "" {
  241. config.NegotiatedDiscount = "0%"
  242. }
  243. return config.CustomPricesEnabled == "true"
  244. }
  245. // AllocateIdleByDefault returns true if the application settings specify to allocate idle by default
  246. func AllocateIdleByDefault(p Provider) bool {
  247. config, err := p.GetConfig()
  248. if err != nil {
  249. return false
  250. }
  251. return config.DefaultIdle == "true"
  252. }
  253. // SharedNamespace returns a list of names of shared namespaces, as defined in the application settings
  254. func SharedNamespaces(p Provider) []string {
  255. namespaces := []string{}
  256. config, err := p.GetConfig()
  257. if err != nil {
  258. return namespaces
  259. }
  260. if config.SharedNamespaces == "" {
  261. return namespaces
  262. }
  263. // trim spaces so that "kube-system, kubecost" is equivalent to "kube-system,kubecost"
  264. for _, ns := range strings.Split(config.SharedNamespaces, ",") {
  265. namespaces = append(namespaces, strings.Trim(ns, " "))
  266. }
  267. return namespaces
  268. }
  269. // SharedLabel returns the configured set of shared labels as a parallel tuple of keys to values; e.g.
  270. // for app:kubecost,type:staging this returns (["app", "type"], ["kubecost", "staging"]) in order to
  271. // match the signature of the NewSharedResourceInfo
  272. func SharedLabels(p Provider) ([]string, []string) {
  273. names := []string{}
  274. values := []string{}
  275. config, err := p.GetConfig()
  276. if err != nil {
  277. return names, values
  278. }
  279. if config.SharedLabelNames == "" || config.SharedLabelValues == "" {
  280. return names, values
  281. }
  282. ks := strings.Split(config.SharedLabelNames, ",")
  283. vs := strings.Split(config.SharedLabelValues, ",")
  284. if len(ks) != len(vs) {
  285. klog.V(2).Infof("[Warning] shared labels have mis-matched lengths: %d names, %d values", len(ks), len(vs))
  286. return names, values
  287. }
  288. for i := range ks {
  289. names = append(names, strings.Trim(ks[i], " "))
  290. values = append(values, strings.Trim(vs[i], " "))
  291. }
  292. return names, values
  293. }
  294. func NewCrossClusterProvider(ctype string, overrideConfigPath string, cache clustercache.ClusterCache) (Provider, error) {
  295. if ctype == "aws" {
  296. return &AWS{
  297. Clientset: cache,
  298. Config: NewProviderConfig(overrideConfigPath),
  299. }, nil
  300. } else if ctype == "gcp" {
  301. return &GCP{
  302. Clientset: cache,
  303. Config: NewProviderConfig(overrideConfigPath),
  304. }, nil
  305. } else if ctype == "azure" {
  306. return &Azure{
  307. Clientset: cache,
  308. Config: NewProviderConfig(overrideConfigPath),
  309. }, nil
  310. }
  311. return &CustomProvider{
  312. Clientset: cache,
  313. Config: NewProviderConfig(overrideConfigPath),
  314. }, nil
  315. }
  316. // NewProvider looks at the nodespec or provider metadata server to decide which provider to instantiate.
  317. func NewProvider(cache clustercache.ClusterCache, apiKey string) (Provider, error) {
  318. nodes := cache.GetAllNodes()
  319. if len(nodes) == 0 {
  320. return nil, fmt.Errorf("Could not locate any nodes for cluster.")
  321. }
  322. provider := strings.ToLower(nodes[0].Spec.ProviderID)
  323. if env.IsUseCSVProvider() {
  324. klog.Infof("Using CSV Provider with CSV at %s", env.GetCSVPath())
  325. configFileName := ""
  326. if metadata.OnGCE() {
  327. configFileName = "gcp.json"
  328. } else if strings.HasPrefix(provider, "aws") {
  329. configFileName = "aws.json"
  330. } else if strings.HasPrefix(provider, "azure") {
  331. configFileName = "azure.json"
  332. } else {
  333. configFileName = "default.json"
  334. }
  335. return &CSVProvider{
  336. CSVLocation: env.GetCSVPath(),
  337. CustomProvider: &CustomProvider{
  338. Clientset: cache,
  339. Config: NewProviderConfig(configFileName),
  340. },
  341. }, nil
  342. }
  343. if metadata.OnGCE() {
  344. klog.V(3).Info("metadata reports we are in GCE")
  345. if apiKey == "" {
  346. return nil, errors.New("Supply a GCP Key to start getting data")
  347. }
  348. return &GCP{
  349. Clientset: cache,
  350. APIKey: apiKey,
  351. Config: NewProviderConfig("gcp.json"),
  352. }, nil
  353. }
  354. if strings.HasPrefix(provider, "aws") {
  355. klog.V(2).Info("Found ProviderID starting with \"aws\", using AWS Provider")
  356. return &AWS{
  357. Clientset: cache,
  358. Config: NewProviderConfig("aws.json"),
  359. }, nil
  360. } else if strings.HasPrefix(provider, "azure") {
  361. klog.V(2).Info("Found ProviderID starting with \"azure\", using Azure Provider")
  362. return &Azure{
  363. Clientset: cache,
  364. Config: NewProviderConfig("azure.json"),
  365. }, nil
  366. } else {
  367. klog.V(2).Info("Unsupported provider, falling back to default")
  368. return &CustomProvider{
  369. Clientset: cache,
  370. Config: NewProviderConfig("default.json"),
  371. }, nil
  372. }
  373. }
  374. func UpdateClusterMeta(cluster_id, cluster_name string) error {
  375. pw := env.GetRemotePW()
  376. address := env.GetSQLAddress()
  377. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  378. db, err := sql.Open("postgres", connStr)
  379. if err != nil {
  380. return err
  381. }
  382. defer db.Close()
  383. updateStmt := `UPDATE names SET cluster_name = $1 WHERE cluster_id = $2;`
  384. _, err = db.Exec(updateStmt, cluster_name, cluster_id)
  385. if err != nil {
  386. return err
  387. }
  388. return nil
  389. }
  390. func CreateClusterMeta(cluster_id, cluster_name string) error {
  391. pw := env.GetRemotePW()
  392. address := env.GetSQLAddress()
  393. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  394. db, err := sql.Open("postgres", connStr)
  395. if err != nil {
  396. return err
  397. }
  398. defer db.Close()
  399. for _, stmt := range createTableStatements {
  400. _, err := db.Exec(stmt)
  401. if err != nil {
  402. return err
  403. }
  404. }
  405. insertStmt := `INSERT INTO names (cluster_id, cluster_name) VALUES ($1, $2);`
  406. _, err = db.Exec(insertStmt, cluster_id, cluster_name)
  407. if err != nil {
  408. return err
  409. }
  410. return nil
  411. }
  412. func GetClusterMeta(cluster_id string) (string, string, error) {
  413. pw := env.GetRemotePW()
  414. address := env.GetSQLAddress()
  415. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  416. db, err := sql.Open("postgres", connStr)
  417. defer db.Close()
  418. query := `SELECT cluster_id, cluster_name
  419. FROM names
  420. WHERE cluster_id = ?`
  421. rows, err := db.Query(query, cluster_id)
  422. if err != nil {
  423. return "", "", err
  424. }
  425. defer rows.Close()
  426. var (
  427. sql_cluster_id string
  428. cluster_name string
  429. )
  430. for rows.Next() {
  431. if err := rows.Scan(&sql_cluster_id, &cluster_name); err != nil {
  432. return "", "", err
  433. }
  434. }
  435. return sql_cluster_id, cluster_name, nil
  436. }
  437. func GetOrCreateClusterMeta(cluster_id, cluster_name string) (string, string, error) {
  438. id, name, err := GetClusterMeta(cluster_id)
  439. if err != nil {
  440. err := CreateClusterMeta(cluster_id, cluster_name)
  441. if err != nil {
  442. return "", "", err
  443. }
  444. }
  445. if id == "" {
  446. err := CreateClusterMeta(cluster_id, cluster_name)
  447. if err != nil {
  448. return "", "", err
  449. }
  450. }
  451. return id, name, nil
  452. }
  453. // ParseID attempts to parse a ProviderId from a string based on formats from the various providers and
  454. // returns the string as is if it cannot find a match
  455. func ParseID(id string) string {
  456. // It's of the form aws:///us-east-2a/i-0fea4fd46592d050b and we want i-0fea4fd46592d050b, if it exists
  457. rx := regexp.MustCompile("aws://[^/]*/[^/]*/([^/]+)")
  458. match := rx.FindStringSubmatch(id)
  459. if len(match) >= 2 {
  460. return match[1]
  461. }
  462. // gce://guestbook-227502/us-central1-a/gke-niko-n1-standard-2-wljla-8df8e58a-hfy7
  463. // => gke-niko-n1-standard-2-wljla-8df8e58a-hfy7
  464. rx = regexp.MustCompile("gce://[^/]*/[^/]*/([^/]+)")
  465. match = rx.FindStringSubmatch(id)
  466. if len(match) >= 2 {
  467. return match[1]
  468. }
  469. // Return id for Azure Provider, CSV Provider and Custom Provider
  470. return id
  471. }
  472. // ParsePVID attempts to parse a PV ProviderId from a string based on formats from the various providers and
  473. // returns the string as is if it cannot find a match
  474. func ParsePVID(id string) string {
  475. // Capture "vol-0fc54c5e83b8d2b76" from "aws://us-east-2a/vol-0fc54c5e83b8d2b76"
  476. rx := regexp.MustCompile("aws:/[^/]*/[^/]*/([^/]+)")
  477. match := rx.FindStringSubmatch(id)
  478. if len(match) >= 2 {
  479. return match[1]
  480. }
  481. // Return id for GCP Provider, Azure Provider, CSV Provider and Custom Provider
  482. return id
  483. }
  484. // ParseLBID attempts to parse a LB ProviderId from a string based on formats from the various providers and
  485. // returns the string as is if it cannot find a match
  486. func ParseLBID(id string) string {
  487. rx := regexp.MustCompile("^([^-]+)-.+amazonaws\\.com$") // Capture "ad9d88195b52a47c89b5055120f28c58" from "ad9d88195b52a47c89b5055120f28c58-1037804914.us-east-2.elb.amazonaws.com"
  488. match := rx.FindStringSubmatch(id)
  489. if len(match) >= 2 {
  490. return match[1]
  491. }
  492. // Return id for GCP Provider, Azure Provider, CSV Provider and Custom Provider
  493. return id
  494. }