provider.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. package cloud
  2. import (
  3. "database/sql"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "strings"
  8. "k8s.io/klog"
  9. "cloud.google.com/go/compute/metadata"
  10. "github.com/kubecost/cost-model/pkg/clustercache"
  11. "github.com/kubecost/cost-model/pkg/env"
  12. v1 "k8s.io/api/core/v1"
  13. )
  14. const authSecretPath = "/var/secrets/service-key.json"
  15. const storageConfigSecretPath = "/var/azure-storage-config/azure-storage-config.json"
  16. var createTableStatements = []string{
  17. `CREATE TABLE IF NOT EXISTS names (
  18. cluster_id VARCHAR(255) NOT NULL,
  19. cluster_name VARCHAR(255) NULL,
  20. PRIMARY KEY (cluster_id)
  21. );`,
  22. }
  23. // ReservedInstanceData keeps record of resources on a node should be
  24. // priced at reserved rates
  25. type ReservedInstanceData struct {
  26. ReservedCPU int64 `json:"reservedCPU"`
  27. ReservedRAM int64 `json:"reservedRAM"`
  28. CPUCost float64 `json:"CPUHourlyCost"`
  29. RAMCost float64 `json:"RAMHourlyCost"`
  30. }
  31. // Node is the interface by which the provider and cost model communicate Node prices.
  32. // The provider will best-effort try to fill out this struct.
  33. type Node struct {
  34. Cost string `json:"hourlyCost"`
  35. VCPU string `json:"CPU"`
  36. VCPUCost string `json:"CPUHourlyCost"`
  37. RAM string `json:"RAM"`
  38. RAMBytes string `json:"RAMBytes"`
  39. RAMCost string `json:"RAMGBHourlyCost"`
  40. Storage string `json:"storage"`
  41. StorageCost string `json:"storageHourlyCost"`
  42. UsesBaseCPUPrice bool `json:"usesDefaultPrice"`
  43. BaseCPUPrice string `json:"baseCPUPrice"` // Used to compute an implicit RAM GB/Hr price when RAM pricing is not provided.
  44. BaseRAMPrice string `json:"baseRAMPrice"` // Used to compute an implicit RAM GB/Hr price when RAM pricing is not provided.
  45. BaseGPUPrice string `json:"baseGPUPrice"`
  46. UsageType string `json:"usageType"`
  47. GPU string `json:"gpu"` // GPU represents the number of GPU on the instance
  48. GPUName string `json:"gpuName"`
  49. GPUCost string `json:"gpuCost"`
  50. InstanceType string `json:"instanceType,omitempty"`
  51. Region string `json:"region,omitempty"`
  52. Reserved *ReservedInstanceData `json:"reserved,omitempty"`
  53. ProviderID string `json:"providerID,omitempty"`
  54. PricingType PricingType `json:"pricingType,omitempty"`
  55. }
  56. // IsSpot determines whether or not a Node uses spot by usage type
  57. func (n *Node) IsSpot() bool {
  58. if n != nil {
  59. return strings.Contains(n.UsageType, "spot") || strings.Contains(n.UsageType, "emptible")
  60. } else {
  61. return false
  62. }
  63. }
  64. // LoadBalancer is the interface by which the provider and cost model communicate LoadBalancer prices.
  65. // The provider will best-effort try to fill out this struct.
  66. type LoadBalancer struct {
  67. IngressIPAddresses []string `json:"IngressIPAddresses"`
  68. Cost float64 `json:"hourlyCost"`
  69. }
  70. // TODO: used for dynamic cloud provider price fetching.
  71. // determine what identifies a load balancer in the json returned from the cloud provider pricing API call
  72. // type LBKey interface {
  73. // }
  74. // Network is the interface by which the provider and cost model communicate network egress prices.
  75. // The provider will best-effort try to fill out this struct.
  76. type Network struct {
  77. ZoneNetworkEgressCost float64
  78. RegionNetworkEgressCost float64
  79. InternetNetworkEgressCost float64
  80. }
  81. // PV is the interface by which the provider and cost model communicate PV prices.
  82. // The provider will best-effort try to fill out this struct.
  83. type PV struct {
  84. Cost string `json:"hourlyCost"`
  85. CostPerIO string `json:"costPerIOOperation"`
  86. Class string `json:"storageClass"`
  87. Size string `json:"size"`
  88. Region string `json:"region"`
  89. ProviderID string `json:"providerID,omitempty"`
  90. Parameters map[string]string `json:"parameters"`
  91. }
  92. // Key represents a way for nodes to match between the k8s API and a pricing API
  93. type Key interface {
  94. ID() string // ID represents an exact match
  95. Features() string // Features are a comma separated string of node metadata that could match pricing
  96. GPUType() string // GPUType returns "" if no GPU exists, but the name of the GPU otherwise
  97. }
  98. type PVKey interface {
  99. Features() string
  100. GetStorageClass() string
  101. ID() string
  102. }
  103. // OutOfClusterAllocation represents a cloud provider cost not associated with kubernetes
  104. type OutOfClusterAllocation struct {
  105. Aggregator string `json:"aggregator"`
  106. Environment string `json:"environment"`
  107. Service string `json:"service"`
  108. Cost float64 `json:"cost"`
  109. Cluster string `json:"cluster"`
  110. }
  111. type CustomPricing struct {
  112. Provider string `json:"provider"`
  113. Description string `json:"description"`
  114. CPU string `json:"CPU"`
  115. SpotCPU string `json:"spotCPU"`
  116. RAM string `json:"RAM"`
  117. SpotRAM string `json:"spotRAM"`
  118. GPU string `json:"GPU"`
  119. SpotGPU string `json:"spotGPU"`
  120. Storage string `json:"storage"`
  121. ZoneNetworkEgress string `json:"zoneNetworkEgress"`
  122. RegionNetworkEgress string `json:"regionNetworkEgress"`
  123. InternetNetworkEgress string `json:"internetNetworkEgress"`
  124. FirstFiveForwardingRulesCost string `json:"firstFiveForwardingRulesCost"`
  125. AdditionalForwardingRuleCost string `json:"additionalForwardingRuleCost"`
  126. LBIngressDataCost string `json:"LBIngressDataCost"`
  127. SpotLabel string `json:"spotLabel,omitempty"`
  128. SpotLabelValue string `json:"spotLabelValue,omitempty"`
  129. GpuLabel string `json:"gpuLabel,omitempty"`
  130. GpuLabelValue string `json:"gpuLabelValue,omitempty"`
  131. ServiceKeyName string `json:"awsServiceKeyName,omitempty"`
  132. ServiceKeySecret string `json:"awsServiceKeySecret,omitempty"`
  133. SpotDataRegion string `json:"awsSpotDataRegion,omitempty"`
  134. SpotDataBucket string `json:"awsSpotDataBucket,omitempty"`
  135. SpotDataPrefix string `json:"awsSpotDataPrefix,omitempty"`
  136. ProjectID string `json:"projectID,omitempty"`
  137. AthenaProjectID string `json:"athenaProjectID,omitempty"`
  138. AthenaBucketName string `json:"athenaBucketName"`
  139. AthenaRegion string `json:"athenaRegion"`
  140. AthenaDatabase string `json:"athenaDatabase"`
  141. AthenaTable string `json:"athenaTable"`
  142. MasterPayerARN string `json:"masterPayerARN"`
  143. BillingDataDataset string `json:"billingDataDataset,omitempty"`
  144. CustomPricesEnabled string `json:"customPricesEnabled"`
  145. DefaultIdle string `json:"defaultIdle"`
  146. AzureSubscriptionID string `json:"azureSubscriptionID"`
  147. AzureClientID string `json:"azureClientID"`
  148. AzureClientSecret string `json:"azureClientSecret"`
  149. AzureTenantID string `json:"azureTenantID"`
  150. AzureBillingRegion string `json:"azureBillingRegion"`
  151. CurrencyCode string `json:"currencyCode"`
  152. Discount string `json:"discount"`
  153. NegotiatedDiscount string `json:"negotiatedDiscount"`
  154. SharedCosts map[string]string `json:"sharedCost"`
  155. ClusterName string `json:"clusterName"`
  156. SharedNamespaces string `json:"sharedNamespaces"`
  157. SharedLabelNames string `json:"sharedLabelNames"`
  158. SharedLabelValues string `json:"sharedLabelValues"`
  159. ReadOnly string `json:"readOnly"`
  160. KubecostToken string `json:"kubecostToken"`
  161. }
  162. type ServiceAccountStatus struct {
  163. Checks []*ServiceAccountCheck `json:"checks"`
  164. }
  165. type ServiceAccountCheck struct {
  166. Message string `json:"message"`
  167. Status bool `json:"status"`
  168. AdditionalInfo string `json:additionalInfo`
  169. }
  170. type PricingSources struct {
  171. PricingSources map[string]*PricingSource
  172. }
  173. type PricingSource struct {
  174. Name string `json:"name"`
  175. Available bool `json:"available"`
  176. Error string `json:"error"`
  177. }
  178. type PricingType string
  179. const (
  180. Api PricingType = "api"
  181. Spot PricingType = "spot"
  182. Reserved PricingType = "reserved"
  183. SavingsPlan PricingType = "savingsPlan"
  184. CsvExact PricingType = "csvExact"
  185. CsvClass PricingType = "csvClass"
  186. DefaultPrices PricingType = "defaultPrices"
  187. )
  188. type PricingMatchMetadata struct {
  189. TotalNodes int `json:"TotalNodes"`
  190. PricingTypeCounts map[PricingType]int `json:"PricingType"`
  191. }
  192. // Provider represents a k8s provider.
  193. type Provider interface {
  194. ClusterInfo() (map[string]string, error)
  195. GetAddresses() ([]byte, error)
  196. GetDisks() ([]byte, error)
  197. NodePricing(Key) (*Node, error)
  198. PVPricing(PVKey) (*PV, error)
  199. NetworkPricing() (*Network, error) // TODO: add key interface arg for dynamic price fetching
  200. LoadBalancerPricing() (*LoadBalancer, error) // TODO: add key interface arg for dynamic price fetching
  201. AllNodePricing() (interface{}, error)
  202. DownloadPricingData() error
  203. GetKey(map[string]string, *v1.Node) Key
  204. GetPVKey(*v1.PersistentVolume, map[string]string, string) PVKey
  205. UpdateConfig(r io.Reader, updateType string) (*CustomPricing, error)
  206. UpdateConfigFromConfigMap(map[string]string) (*CustomPricing, error)
  207. GetConfig() (*CustomPricing, error)
  208. GetManagementPlatform() (string, error)
  209. GetLocalStorageQuery(string, string, bool, bool) string
  210. ExternalAllocations(string, string, []string, string, string, bool) ([]*OutOfClusterAllocation, error)
  211. ApplyReservedInstancePricing(map[string]*Node)
  212. ServiceAccountStatus() *ServiceAccountStatus
  213. PricingSourceStatus() map[string]*PricingSource
  214. ClusterManagementPricing() (string, float64, error)
  215. CombinedDiscountForNode(string, bool, float64, float64) float64
  216. ParseID(string) string
  217. ParsePVID(string) string
  218. ParseLBID(string) string
  219. }
  220. // ClusterName returns the name defined in cluster info, defaulting to the
  221. // CLUSTER_ID environment variable
  222. func ClusterName(p Provider) string {
  223. info, err := p.ClusterInfo()
  224. if err != nil {
  225. return env.GetClusterID()
  226. }
  227. name, ok := info["name"]
  228. if !ok {
  229. return env.GetClusterID()
  230. }
  231. return name
  232. }
  233. // CustomPricesEnabled returns the boolean equivalent of the cloup provider's custom prices flag,
  234. // indicating whether or not the cluster is using custom pricing.
  235. func CustomPricesEnabled(p Provider) bool {
  236. config, err := p.GetConfig()
  237. if err != nil {
  238. return false
  239. }
  240. // TODO:CLEANUP what is going on with this?
  241. if config.NegotiatedDiscount == "" {
  242. config.NegotiatedDiscount = "0%"
  243. }
  244. return config.CustomPricesEnabled == "true"
  245. }
  246. // AllocateIdleByDefault returns true if the application settings specify to allocate idle by default
  247. func AllocateIdleByDefault(p Provider) bool {
  248. config, err := p.GetConfig()
  249. if err != nil {
  250. return false
  251. }
  252. return config.DefaultIdle == "true"
  253. }
  254. // SharedNamespace returns a list of names of shared namespaces, as defined in the application settings
  255. func SharedNamespaces(p Provider) []string {
  256. namespaces := []string{}
  257. config, err := p.GetConfig()
  258. if err != nil {
  259. return namespaces
  260. }
  261. if config.SharedNamespaces == "" {
  262. return namespaces
  263. }
  264. // trim spaces so that "kube-system, kubecost" is equivalent to "kube-system,kubecost"
  265. for _, ns := range strings.Split(config.SharedNamespaces, ",") {
  266. namespaces = append(namespaces, strings.Trim(ns, " "))
  267. }
  268. return namespaces
  269. }
  270. // SharedLabel returns the configured set of shared labels as a parallel tuple of keys to values; e.g.
  271. // for app:kubecost,type:staging this returns (["app", "type"], ["kubecost", "staging"]) in order to
  272. // match the signature of the NewSharedResourceInfo
  273. func SharedLabels(p Provider) ([]string, []string) {
  274. names := []string{}
  275. values := []string{}
  276. config, err := p.GetConfig()
  277. if err != nil {
  278. return names, values
  279. }
  280. if config.SharedLabelNames == "" || config.SharedLabelValues == "" {
  281. return names, values
  282. }
  283. ks := strings.Split(config.SharedLabelNames, ",")
  284. vs := strings.Split(config.SharedLabelValues, ",")
  285. if len(ks) != len(vs) {
  286. klog.V(2).Infof("[Warning] shared labels have mis-matched lengths: %d names, %d values", len(ks), len(vs))
  287. return names, values
  288. }
  289. for i := range ks {
  290. names = append(names, strings.Trim(ks[i], " "))
  291. values = append(values, strings.Trim(vs[i], " "))
  292. }
  293. return names, values
  294. }
  295. func NewCrossClusterProvider(ctype string, overrideConfigPath string, cache clustercache.ClusterCache) (Provider, error) {
  296. if ctype == "aws" {
  297. return &AWS{
  298. Clientset: cache,
  299. Config: NewProviderConfig(overrideConfigPath),
  300. }, nil
  301. } else if ctype == "gcp" {
  302. return &GCP{
  303. Clientset: cache,
  304. Config: NewProviderConfig(overrideConfigPath),
  305. }, nil
  306. }
  307. return &CustomProvider{
  308. Clientset: cache,
  309. Config: NewProviderConfig(overrideConfigPath),
  310. }, nil
  311. }
  312. // NewProvider looks at the nodespec or provider metadata server to decide which provider to instantiate.
  313. func NewProvider(cache clustercache.ClusterCache, apiKey string) (Provider, error) {
  314. nodes := cache.GetAllNodes()
  315. if len(nodes) == 0 {
  316. return nil, fmt.Errorf("Could not locate any nodes for cluster.")
  317. }
  318. provider := strings.ToLower(nodes[0].Spec.ProviderID)
  319. if env.IsUseCSVProvider() {
  320. klog.Infof("Using CSV Provider with CSV at %s", env.GetCSVPath())
  321. configFileName := ""
  322. if metadata.OnGCE() {
  323. configFileName = "gcp.json"
  324. } else if strings.HasPrefix(provider, "aws") {
  325. configFileName = "aws.json"
  326. } else if strings.HasPrefix(provider, "azure") {
  327. configFileName = "azure.json"
  328. } else {
  329. configFileName = "default.json"
  330. }
  331. return &CSVProvider{
  332. CSVLocation: env.GetCSVPath(),
  333. CustomProvider: &CustomProvider{
  334. Clientset: cache,
  335. Config: NewProviderConfig(configFileName),
  336. },
  337. }, nil
  338. }
  339. if metadata.OnGCE() {
  340. klog.V(3).Info("metadata reports we are in GCE")
  341. if apiKey == "" {
  342. return nil, errors.New("Supply a GCP Key to start getting data")
  343. }
  344. return &GCP{
  345. Clientset: cache,
  346. APIKey: apiKey,
  347. Config: NewProviderConfig("gcp.json"),
  348. }, nil
  349. }
  350. if strings.HasPrefix(provider, "aws") {
  351. klog.V(2).Info("Found ProviderID starting with \"aws\", using AWS Provider")
  352. return &AWS{
  353. Clientset: cache,
  354. Config: NewProviderConfig("aws.json"),
  355. }, nil
  356. } else if strings.HasPrefix(provider, "azure") {
  357. klog.V(2).Info("Found ProviderID starting with \"azure\", using Azure Provider")
  358. return &Azure{
  359. Clientset: cache,
  360. Config: NewProviderConfig("azure.json"),
  361. }, nil
  362. } else {
  363. klog.V(2).Info("Unsupported provider, falling back to default")
  364. return &CustomProvider{
  365. Clientset: cache,
  366. Config: NewProviderConfig("default.json"),
  367. }, nil
  368. }
  369. }
  370. func UpdateClusterMeta(cluster_id, cluster_name string) error {
  371. pw := env.GetRemotePW()
  372. address := env.GetSQLAddress()
  373. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  374. db, err := sql.Open("postgres", connStr)
  375. if err != nil {
  376. return err
  377. }
  378. defer db.Close()
  379. updateStmt := `UPDATE names SET cluster_name = $1 WHERE cluster_id = $2;`
  380. _, err = db.Exec(updateStmt, cluster_name, cluster_id)
  381. if err != nil {
  382. return err
  383. }
  384. return nil
  385. }
  386. func CreateClusterMeta(cluster_id, cluster_name string) error {
  387. pw := env.GetRemotePW()
  388. address := env.GetSQLAddress()
  389. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  390. db, err := sql.Open("postgres", connStr)
  391. if err != nil {
  392. return err
  393. }
  394. defer db.Close()
  395. for _, stmt := range createTableStatements {
  396. _, err := db.Exec(stmt)
  397. if err != nil {
  398. return err
  399. }
  400. }
  401. insertStmt := `INSERT INTO names (cluster_id, cluster_name) VALUES ($1, $2);`
  402. _, err = db.Exec(insertStmt, cluster_id, cluster_name)
  403. if err != nil {
  404. return err
  405. }
  406. return nil
  407. }
  408. func GetClusterMeta(cluster_id string) (string, string, error) {
  409. pw := env.GetRemotePW()
  410. address := env.GetSQLAddress()
  411. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  412. db, err := sql.Open("postgres", connStr)
  413. defer db.Close()
  414. query := `SELECT cluster_id, cluster_name
  415. FROM names
  416. WHERE cluster_id = ?`
  417. rows, err := db.Query(query, cluster_id)
  418. if err != nil {
  419. return "", "", err
  420. }
  421. defer rows.Close()
  422. var (
  423. sql_cluster_id string
  424. cluster_name string
  425. )
  426. for rows.Next() {
  427. if err := rows.Scan(&sql_cluster_id, &cluster_name); err != nil {
  428. return "", "", err
  429. }
  430. }
  431. return sql_cluster_id, cluster_name, nil
  432. }
  433. func GetOrCreateClusterMeta(cluster_id, cluster_name string) (string, string, error) {
  434. id, name, err := GetClusterMeta(cluster_id)
  435. if err != nil {
  436. err := CreateClusterMeta(cluster_id, cluster_name)
  437. if err != nil {
  438. return "", "", err
  439. }
  440. }
  441. if id == "" {
  442. err := CreateClusterMeta(cluster_id, cluster_name)
  443. if err != nil {
  444. return "", "", err
  445. }
  446. }
  447. return id, name, nil
  448. }