provider.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. package cloud
  2. import (
  3. "database/sql"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "strings"
  8. "time"
  9. "k8s.io/klog"
  10. "cloud.google.com/go/compute/metadata"
  11. "github.com/kubecost/cost-model/pkg/clustercache"
  12. "github.com/kubecost/cost-model/pkg/env"
  13. v1 "k8s.io/api/core/v1"
  14. )
  15. const authSecretPath = "/var/secrets/service-key.json"
  16. const storageConfigSecretPath = "/var/azure-storage-config/azure-storage-config.json"
  17. var createTableStatements = []string{
  18. `CREATE TABLE IF NOT EXISTS names (
  19. cluster_id VARCHAR(255) NOT NULL,
  20. cluster_name VARCHAR(255) NULL,
  21. PRIMARY KEY (cluster_id)
  22. );`,
  23. }
  24. // ReservedInstanceData keeps record of resources on a node should be
  25. // priced at reserved rates
  26. type ReservedInstanceData struct {
  27. ReservedCPU int64 `json:"reservedCPU"`
  28. ReservedRAM int64 `json:"reservedRAM"`
  29. CPUCost float64 `json:"CPUHourlyCost"`
  30. RAMCost float64 `json:"RAMHourlyCost"`
  31. }
  32. // Node is the interface by which the provider and cost model communicate Node prices.
  33. // The provider will best-effort try to fill out this struct.
  34. type Node struct {
  35. Cost string `json:"hourlyCost"`
  36. VCPU string `json:"CPU"`
  37. VCPUCost string `json:"CPUHourlyCost"`
  38. RAM string `json:"RAM"`
  39. RAMBytes string `json:"RAMBytes"`
  40. RAMCost string `json:"RAMGBHourlyCost"`
  41. Storage string `json:"storage"`
  42. StorageCost string `json:"storageHourlyCost"`
  43. UsesBaseCPUPrice bool `json:"usesDefaultPrice"`
  44. BaseCPUPrice string `json:"baseCPUPrice"` // Used to compute an implicit RAM GB/Hr price when RAM pricing is not provided.
  45. BaseRAMPrice string `json:"baseRAMPrice"` // Used to compute an implicit RAM GB/Hr price when RAM pricing is not provided.
  46. BaseGPUPrice string `json:"baseGPUPrice"`
  47. UsageType string `json:"usageType"`
  48. GPU string `json:"gpu"` // GPU represents the number of GPU on the instance
  49. GPUName string `json:"gpuName"`
  50. GPUCost string `json:"gpuCost"`
  51. InstanceType string `json:"instanceType,omitempty"`
  52. Region string `json:"region,omitempty"`
  53. Reserved *ReservedInstanceData `json:"reserved,omitempty"`
  54. ProviderID string `json:"providerID,omitempty"`
  55. PricingType PricingType `json:"pricingType,omitempty"`
  56. }
  57. // IsSpot determines whether or not a Node uses spot by usage type
  58. func (n *Node) IsSpot() bool {
  59. if n != nil {
  60. return strings.Contains(n.UsageType, "spot") || strings.Contains(n.UsageType, "emptible")
  61. } else {
  62. return false
  63. }
  64. }
  65. // LoadBalancer is the interface by which the provider and cost model communicate LoadBalancer prices.
  66. // The provider will best-effort try to fill out this struct.
  67. type LoadBalancer struct {
  68. IngressIPAddresses []string `json:"IngressIPAddresses"`
  69. Cost float64 `json:"hourlyCost"`
  70. }
  71. // TODO: used for dynamic cloud provider price fetching.
  72. // determine what identifies a load balancer in the json returned from the cloud provider pricing API call
  73. // type LBKey interface {
  74. // }
  75. // Network is the interface by which the provider and cost model communicate network egress prices.
  76. // The provider will best-effort try to fill out this struct.
  77. type Network struct {
  78. ZoneNetworkEgressCost float64
  79. RegionNetworkEgressCost float64
  80. InternetNetworkEgressCost float64
  81. }
  82. // PV is the interface by which the provider and cost model communicate PV prices.
  83. // The provider will best-effort try to fill out this struct.
  84. type PV struct {
  85. Cost string `json:"hourlyCost"`
  86. CostPerIO string `json:"costPerIOOperation"`
  87. Class string `json:"storageClass"`
  88. Size string `json:"size"`
  89. Region string `json:"region"`
  90. ProviderID string `json:"providerID,omitempty"`
  91. Parameters map[string]string `json:"parameters"`
  92. }
  93. // Key represents a way for nodes to match between the k8s API and a pricing API
  94. type Key interface {
  95. ID() string // ID represents an exact match
  96. Features() string // Features are a comma separated string of node metadata that could match pricing
  97. GPUType() string // GPUType returns "" if no GPU exists, but the name of the GPU otherwise
  98. }
  99. type PVKey interface {
  100. Features() string
  101. GetStorageClass() string
  102. ID() string
  103. }
  104. // OutOfClusterAllocation represents a cloud provider cost not associated with kubernetes
  105. type OutOfClusterAllocation struct {
  106. Aggregator string `json:"aggregator"`
  107. Environment string `json:"environment"`
  108. Service string `json:"service"`
  109. Cost float64 `json:"cost"`
  110. Cluster string `json:"cluster"`
  111. }
  112. type CustomPricing struct {
  113. Provider string `json:"provider"`
  114. Description string `json:"description"`
  115. CPU string `json:"CPU"`
  116. SpotCPU string `json:"spotCPU"`
  117. RAM string `json:"RAM"`
  118. SpotRAM string `json:"spotRAM"`
  119. GPU string `json:"GPU"`
  120. SpotGPU string `json:"spotGPU"`
  121. Storage string `json:"storage"`
  122. ZoneNetworkEgress string `json:"zoneNetworkEgress"`
  123. RegionNetworkEgress string `json:"regionNetworkEgress"`
  124. InternetNetworkEgress string `json:"internetNetworkEgress"`
  125. FirstFiveForwardingRulesCost string `json:"firstFiveForwardingRulesCost"`
  126. AdditionalForwardingRuleCost string `json:"additionalForwardingRuleCost"`
  127. LBIngressDataCost string `json:"LBIngressDataCost"`
  128. SpotLabel string `json:"spotLabel,omitempty"`
  129. SpotLabelValue string `json:"spotLabelValue,omitempty"`
  130. GpuLabel string `json:"gpuLabel,omitempty"`
  131. GpuLabelValue string `json:"gpuLabelValue,omitempty"`
  132. ServiceKeyName string `json:"awsServiceKeyName,omitempty"`
  133. ServiceKeySecret string `json:"awsServiceKeySecret,omitempty"`
  134. SpotDataRegion string `json:"awsSpotDataRegion,omitempty"`
  135. SpotDataBucket string `json:"awsSpotDataBucket,omitempty"`
  136. SpotDataPrefix string `json:"awsSpotDataPrefix,omitempty"`
  137. ProjectID string `json:"projectID,omitempty"`
  138. AthenaProjectID string `json:"athenaProjectID,omitempty"`
  139. AthenaBucketName string `json:"athenaBucketName"`
  140. AthenaRegion string `json:"athenaRegion"`
  141. AthenaDatabase string `json:"athenaDatabase"`
  142. AthenaTable string `json:"athenaTable"`
  143. MasterPayerARN string `json:"masterPayerARN"`
  144. BillingDataDataset string `json:"billingDataDataset,omitempty"`
  145. CustomPricesEnabled string `json:"customPricesEnabled"`
  146. DefaultIdle string `json:"defaultIdle"`
  147. AzureSubscriptionID string `json:"azureSubscriptionID"`
  148. AzureClientID string `json:"azureClientID"`
  149. AzureClientSecret string `json:"azureClientSecret"`
  150. AzureTenantID string `json:"azureTenantID"`
  151. AzureBillingRegion string `json:"azureBillingRegion"`
  152. CurrencyCode string `json:"currencyCode"`
  153. Discount string `json:"discount"`
  154. NegotiatedDiscount string `json:"negotiatedDiscount"`
  155. SharedCosts map[string]string `json:"sharedCost"`
  156. ClusterName string `json:"clusterName"`
  157. SharedNamespaces string `json:"sharedNamespaces"`
  158. SharedLabelNames string `json:"sharedLabelNames"`
  159. SharedLabelValues string `json:"sharedLabelValues"`
  160. ReadOnly string `json:"readOnly"`
  161. KubecostToken string `json:"kubecostToken"`
  162. }
  163. type ServiceAccountStatus struct {
  164. Checks []*ServiceAccountCheck `json:"checks"`
  165. }
  166. type ServiceAccountCheck struct {
  167. Message string `json:"message"`
  168. Status bool `json:"status"`
  169. AdditionalInfo string `json:"additionalInfo"`
  170. }
  171. type PricingSources struct {
  172. PricingSources map[string]*PricingSource
  173. }
  174. type PricingSource struct {
  175. Name string `json:"name"`
  176. Available bool `json:"available"`
  177. Error string `json:"error"`
  178. }
  179. type PricingType string
  180. const (
  181. Api PricingType = "api"
  182. Spot PricingType = "spot"
  183. Reserved PricingType = "reserved"
  184. SavingsPlan PricingType = "savingsPlan"
  185. CsvExact PricingType = "csvExact"
  186. CsvClass PricingType = "csvClass"
  187. DefaultPrices PricingType = "defaultPrices"
  188. )
  189. type PricingMatchMetadata struct {
  190. TotalNodes int `json:"TotalNodes"`
  191. PricingTypeCounts map[PricingType]int `json:"PricingType"`
  192. }
  193. // Provider represents a k8s provider.
  194. type Provider interface {
  195. ClusterInfo() (map[string]string, error)
  196. GetAddresses() ([]byte, error)
  197. GetDisks() ([]byte, error)
  198. NodePricing(Key) (*Node, error)
  199. PVPricing(PVKey) (*PV, error)
  200. NetworkPricing() (*Network, error) // TODO: add key interface arg for dynamic price fetching
  201. LoadBalancerPricing() (*LoadBalancer, error) // TODO: add key interface arg for dynamic price fetching
  202. AllNodePricing() (interface{}, error)
  203. DownloadPricingData() error
  204. GetKey(map[string]string, *v1.Node) Key
  205. GetPVKey(*v1.PersistentVolume, map[string]string, string) PVKey
  206. UpdateConfig(r io.Reader, updateType string) (*CustomPricing, error)
  207. UpdateConfigFromConfigMap(map[string]string) (*CustomPricing, error)
  208. GetConfig() (*CustomPricing, error)
  209. GetManagementPlatform() (string, error)
  210. GetLocalStorageQuery(time.Duration, time.Duration, bool, bool) string
  211. ExternalAllocations(string, string, []string, string, string, bool) ([]*OutOfClusterAllocation, error)
  212. ApplyReservedInstancePricing(map[string]*Node)
  213. ServiceAccountStatus() *ServiceAccountStatus
  214. PricingSourceStatus() map[string]*PricingSource
  215. ClusterManagementPricing() (string, float64, error)
  216. CombinedDiscountForNode(string, bool, float64, float64) float64
  217. ParseID(string) string
  218. ParsePVID(string) string
  219. ParseLBID(string) string
  220. }
  221. // ClusterName returns the name defined in cluster info, defaulting to the
  222. // CLUSTER_ID environment variable
  223. func ClusterName(p Provider) string {
  224. info, err := p.ClusterInfo()
  225. if err != nil {
  226. return env.GetClusterID()
  227. }
  228. name, ok := info["name"]
  229. if !ok {
  230. return env.GetClusterID()
  231. }
  232. return name
  233. }
  234. // CustomPricesEnabled returns the boolean equivalent of the cloup provider's custom prices flag,
  235. // indicating whether or not the cluster is using custom pricing.
  236. func CustomPricesEnabled(p Provider) bool {
  237. config, err := p.GetConfig()
  238. if err != nil {
  239. return false
  240. }
  241. // TODO:CLEANUP what is going on with this?
  242. if config.NegotiatedDiscount == "" {
  243. config.NegotiatedDiscount = "0%"
  244. }
  245. return config.CustomPricesEnabled == "true"
  246. }
  247. // AllocateIdleByDefault returns true if the application settings specify to allocate idle by default
  248. func AllocateIdleByDefault(p Provider) bool {
  249. config, err := p.GetConfig()
  250. if err != nil {
  251. return false
  252. }
  253. return config.DefaultIdle == "true"
  254. }
  255. // SharedNamespace returns a list of names of shared namespaces, as defined in the application settings
  256. func SharedNamespaces(p Provider) []string {
  257. namespaces := []string{}
  258. config, err := p.GetConfig()
  259. if err != nil {
  260. return namespaces
  261. }
  262. if config.SharedNamespaces == "" {
  263. return namespaces
  264. }
  265. // trim spaces so that "kube-system, kubecost" is equivalent to "kube-system,kubecost"
  266. for _, ns := range strings.Split(config.SharedNamespaces, ",") {
  267. namespaces = append(namespaces, strings.Trim(ns, " "))
  268. }
  269. return namespaces
  270. }
  271. // SharedLabel returns the configured set of shared labels as a parallel tuple of keys to values; e.g.
  272. // for app:kubecost,type:staging this returns (["app", "type"], ["kubecost", "staging"]) in order to
  273. // match the signature of the NewSharedResourceInfo
  274. func SharedLabels(p Provider) ([]string, []string) {
  275. names := []string{}
  276. values := []string{}
  277. config, err := p.GetConfig()
  278. if err != nil {
  279. return names, values
  280. }
  281. if config.SharedLabelNames == "" || config.SharedLabelValues == "" {
  282. return names, values
  283. }
  284. ks := strings.Split(config.SharedLabelNames, ",")
  285. vs := strings.Split(config.SharedLabelValues, ",")
  286. if len(ks) != len(vs) {
  287. klog.V(2).Infof("[Warning] shared labels have mis-matched lengths: %d names, %d values", len(ks), len(vs))
  288. return names, values
  289. }
  290. for i := range ks {
  291. names = append(names, strings.Trim(ks[i], " "))
  292. values = append(values, strings.Trim(vs[i], " "))
  293. }
  294. return names, values
  295. }
  296. func NewCrossClusterProvider(ctype string, overrideConfigPath string, cache clustercache.ClusterCache) (Provider, error) {
  297. if ctype == "aws" {
  298. return &AWS{
  299. Clientset: cache,
  300. Config: NewProviderConfig(overrideConfigPath),
  301. }, nil
  302. } else if ctype == "gcp" {
  303. return &GCP{
  304. Clientset: cache,
  305. Config: NewProviderConfig(overrideConfigPath),
  306. }, nil
  307. }
  308. return &CustomProvider{
  309. Clientset: cache,
  310. Config: NewProviderConfig(overrideConfigPath),
  311. }, nil
  312. }
  313. // NewProvider looks at the nodespec or provider metadata server to decide which provider to instantiate.
  314. func NewProvider(cache clustercache.ClusterCache, apiKey string) (Provider, error) {
  315. nodes := cache.GetAllNodes()
  316. if len(nodes) == 0 {
  317. return nil, fmt.Errorf("Could not locate any nodes for cluster.")
  318. }
  319. provider := strings.ToLower(nodes[0].Spec.ProviderID)
  320. if env.IsUseCSVProvider() {
  321. klog.Infof("Using CSV Provider with CSV at %s", env.GetCSVPath())
  322. configFileName := ""
  323. if metadata.OnGCE() {
  324. configFileName = "gcp.json"
  325. } else if strings.HasPrefix(provider, "aws") {
  326. configFileName = "aws.json"
  327. } else if strings.HasPrefix(provider, "azure") {
  328. configFileName = "azure.json"
  329. } else {
  330. configFileName = "default.json"
  331. }
  332. return &CSVProvider{
  333. CSVLocation: env.GetCSVPath(),
  334. CustomProvider: &CustomProvider{
  335. Clientset: cache,
  336. Config: NewProviderConfig(configFileName),
  337. },
  338. }, nil
  339. }
  340. if metadata.OnGCE() {
  341. klog.V(3).Info("metadata reports we are in GCE")
  342. if apiKey == "" {
  343. return nil, errors.New("Supply a GCP Key to start getting data")
  344. }
  345. return &GCP{
  346. Clientset: cache,
  347. APIKey: apiKey,
  348. Config: NewProviderConfig("gcp.json"),
  349. }, nil
  350. }
  351. if strings.HasPrefix(provider, "aws") {
  352. klog.V(2).Info("Found ProviderID starting with \"aws\", using AWS Provider")
  353. return &AWS{
  354. Clientset: cache,
  355. Config: NewProviderConfig("aws.json"),
  356. }, nil
  357. } else if strings.HasPrefix(provider, "azure") {
  358. klog.V(2).Info("Found ProviderID starting with \"azure\", using Azure Provider")
  359. return &Azure{
  360. Clientset: cache,
  361. Config: NewProviderConfig("azure.json"),
  362. }, nil
  363. } else {
  364. klog.V(2).Info("Unsupported provider, falling back to default")
  365. return &CustomProvider{
  366. Clientset: cache,
  367. Config: NewProviderConfig("default.json"),
  368. }, nil
  369. }
  370. }
  371. func UpdateClusterMeta(cluster_id, cluster_name string) error {
  372. pw := env.GetRemotePW()
  373. address := env.GetSQLAddress()
  374. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  375. db, err := sql.Open("postgres", connStr)
  376. if err != nil {
  377. return err
  378. }
  379. defer db.Close()
  380. updateStmt := `UPDATE names SET cluster_name = $1 WHERE cluster_id = $2;`
  381. _, err = db.Exec(updateStmt, cluster_name, cluster_id)
  382. if err != nil {
  383. return err
  384. }
  385. return nil
  386. }
  387. func CreateClusterMeta(cluster_id, cluster_name string) error {
  388. pw := env.GetRemotePW()
  389. address := env.GetSQLAddress()
  390. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  391. db, err := sql.Open("postgres", connStr)
  392. if err != nil {
  393. return err
  394. }
  395. defer db.Close()
  396. for _, stmt := range createTableStatements {
  397. _, err := db.Exec(stmt)
  398. if err != nil {
  399. return err
  400. }
  401. }
  402. insertStmt := `INSERT INTO names (cluster_id, cluster_name) VALUES ($1, $2);`
  403. _, err = db.Exec(insertStmt, cluster_id, cluster_name)
  404. if err != nil {
  405. return err
  406. }
  407. return nil
  408. }
  409. func GetClusterMeta(cluster_id string) (string, string, error) {
  410. pw := env.GetRemotePW()
  411. address := env.GetSQLAddress()
  412. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  413. db, err := sql.Open("postgres", connStr)
  414. defer db.Close()
  415. query := `SELECT cluster_id, cluster_name
  416. FROM names
  417. WHERE cluster_id = ?`
  418. rows, err := db.Query(query, cluster_id)
  419. if err != nil {
  420. return "", "", err
  421. }
  422. defer rows.Close()
  423. var (
  424. sql_cluster_id string
  425. cluster_name string
  426. )
  427. for rows.Next() {
  428. if err := rows.Scan(&sql_cluster_id, &cluster_name); err != nil {
  429. return "", "", err
  430. }
  431. }
  432. return sql_cluster_id, cluster_name, nil
  433. }
  434. func GetOrCreateClusterMeta(cluster_id, cluster_name string) (string, string, error) {
  435. id, name, err := GetClusterMeta(cluster_id)
  436. if err != nil {
  437. err := CreateClusterMeta(cluster_id, cluster_name)
  438. if err != nil {
  439. return "", "", err
  440. }
  441. }
  442. if id == "" {
  443. err := CreateClusterMeta(cluster_id, cluster_name)
  444. if err != nil {
  445. return "", "", err
  446. }
  447. }
  448. return id, name, nil
  449. }