provider.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. package cloud
  2. import (
  3. "database/sql"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "strings"
  8. "k8s.io/klog"
  9. "cloud.google.com/go/compute/metadata"
  10. "github.com/kubecost/cost-model/pkg/clustercache"
  11. "github.com/kubecost/cost-model/pkg/env"
  12. v1 "k8s.io/api/core/v1"
  13. )
  14. const authSecretPath = "/var/secrets/service-key.json"
  15. const storageConfigSecretPath = "/var/azure-storage-config/azure-storage-config.json"
  16. var createTableStatements = []string{
  17. `CREATE TABLE IF NOT EXISTS names (
  18. cluster_id VARCHAR(255) NOT NULL,
  19. cluster_name VARCHAR(255) NULL,
  20. PRIMARY KEY (cluster_id)
  21. );`,
  22. }
  23. // ReservedInstanceData keeps record of resources on a node should be
  24. // priced at reserved rates
  25. type ReservedInstanceData struct {
  26. ReservedCPU int64 `json:"reservedCPU"`
  27. ReservedRAM int64 `json:"reservedRAM"`
  28. CPUCost float64 `json:"CPUHourlyCost"`
  29. RAMCost float64 `json:"RAMHourlyCost"`
  30. }
  31. // Node is the interface by which the provider and cost model communicate Node prices.
  32. // The provider will best-effort try to fill out this struct.
  33. type Node struct {
  34. Cost string `json:"hourlyCost"`
  35. VCPU string `json:"CPU"`
  36. VCPUCost string `json:"CPUHourlyCost"`
  37. RAM string `json:"RAM"`
  38. RAMBytes string `json:"RAMBytes"`
  39. RAMCost string `json:"RAMGBHourlyCost"`
  40. Storage string `json:"storage"`
  41. StorageCost string `json:"storageHourlyCost"`
  42. UsesBaseCPUPrice bool `json:"usesDefaultPrice"`
  43. BaseCPUPrice string `json:"baseCPUPrice"` // Used to compute an implicit RAM GB/Hr price when RAM pricing is not provided.
  44. BaseRAMPrice string `json:"baseRAMPrice"` // Used to compute an implicit RAM GB/Hr price when RAM pricing is not provided.
  45. BaseGPUPrice string `json:"baseGPUPrice"`
  46. UsageType string `json:"usageType"`
  47. GPU string `json:"gpu"` // GPU represents the number of GPU on the instance
  48. GPUName string `json:"gpuName"`
  49. GPUCost string `json:"gpuCost"`
  50. InstanceType string `json:"instanceType,omitempty"`
  51. Region string `json:"region,omitempty"`
  52. Reserved *ReservedInstanceData `json:"reserved,omitempty"`
  53. ProviderID string `json:"providerID,omitempty"`
  54. PricingType PricingType `json:"pricingType,omitempty"`
  55. }
  56. // IsSpot determines whether or not a Node uses spot by usage type
  57. func (n *Node) IsSpot() bool {
  58. if n != nil {
  59. return strings.Contains(n.UsageType, "spot") || strings.Contains(n.UsageType, "emptible")
  60. } else {
  61. return false
  62. }
  63. }
  64. // LoadBalancer is the interface by which the provider and cost model communicate LoadBalancer prices.
  65. // The provider will best-effort try to fill out this struct.
  66. type LoadBalancer struct {
  67. IngressIPAddresses []string `json:"IngressIPAddresses"`
  68. Cost float64 `json:"hourlyCost"`
  69. }
  70. // TODO: used for dynamic cloud provider price fetching.
  71. // determine what identifies a load balancer in the json returned from the cloud provider pricing API call
  72. // type LBKey interface {
  73. // }
  74. // Network is the interface by which the provider and cost model communicate network egress prices.
  75. // The provider will best-effort try to fill out this struct.
  76. type Network struct {
  77. ZoneNetworkEgressCost float64
  78. RegionNetworkEgressCost float64
  79. InternetNetworkEgressCost float64
  80. }
  81. // PV is the interface by which the provider and cost model communicate PV prices.
  82. // The provider will best-effort try to fill out this struct.
  83. type PV struct {
  84. Cost string `json:"hourlyCost"`
  85. CostPerIO string `json:"costPerIOOperation"`
  86. Class string `json:"storageClass"`
  87. Size string `json:"size"`
  88. Region string `json:"region"`
  89. ProviderID string `json:"providerID,omitempty"`
  90. Parameters map[string]string `json:"parameters"`
  91. }
  92. // Key represents a way for nodes to match between the k8s API and a pricing API
  93. type Key interface {
  94. ID() string // ID represents an exact match
  95. Features() string // Features are a comma separated string of node metadata that could match pricing
  96. GPUType() string // GPUType returns "" if no GPU exists, but the name of the GPU otherwise
  97. }
  98. type PVKey interface {
  99. Features() string
  100. GetStorageClass() string
  101. ID() string
  102. }
  103. // OutOfClusterAllocation represents a cloud provider cost not associated with kubernetes
  104. type OutOfClusterAllocation struct {
  105. Aggregator string `json:"aggregator"`
  106. Environment string `json:"environment"`
  107. Service string `json:"service"`
  108. Cost float64 `json:"cost"`
  109. Cluster string `json:"cluster"`
  110. }
  111. type CustomPricing struct {
  112. Provider string `json:"provider"`
  113. Description string `json:"description"`
  114. CPU string `json:"CPU"`
  115. SpotCPU string `json:"spotCPU"`
  116. RAM string `json:"RAM"`
  117. SpotRAM string `json:"spotRAM"`
  118. GPU string `json:"GPU"`
  119. SpotGPU string `json:"spotGPU"`
  120. Storage string `json:"storage"`
  121. ZoneNetworkEgress string `json:"zoneNetworkEgress"`
  122. RegionNetworkEgress string `json:"regionNetworkEgress"`
  123. InternetNetworkEgress string `json:"internetNetworkEgress"`
  124. FirstFiveForwardingRulesCost string `json:"firstFiveForwardingRulesCost"`
  125. AdditionalForwardingRuleCost string `json:"additionalForwardingRuleCost"`
  126. LBIngressDataCost string `json:"LBIngressDataCost"`
  127. SpotLabel string `json:"spotLabel,omitempty"`
  128. SpotLabelValue string `json:"spotLabelValue,omitempty"`
  129. GpuLabel string `json:"gpuLabel,omitempty"`
  130. GpuLabelValue string `json:"gpuLabelValue,omitempty"`
  131. ServiceKeyName string `json:"awsServiceKeyName,omitempty"`
  132. ServiceKeySecret string `json:"awsServiceKeySecret,omitempty"`
  133. SpotDataRegion string `json:"awsSpotDataRegion,omitempty"`
  134. SpotDataBucket string `json:"awsSpotDataBucket,omitempty"`
  135. SpotDataPrefix string `json:"awsSpotDataPrefix,omitempty"`
  136. ProjectID string `json:"projectID,omitempty"`
  137. AthenaProjectID string `json:"athenaProjectID,omitempty"`
  138. AthenaBucketName string `json:"athenaBucketName"`
  139. AthenaRegion string `json:"athenaRegion"`
  140. AthenaDatabase string `json:"athenaDatabase"`
  141. AthenaTable string `json:"athenaTable"`
  142. MasterPayerARN string `json:"masterPayerARN"`
  143. BillingDataDataset string `json:"billingDataDataset,omitempty"`
  144. CustomPricesEnabled string `json:"customPricesEnabled"`
  145. DefaultIdle string `json:"defaultIdle"`
  146. AzureSubscriptionID string `json:"azureSubscriptionID"`
  147. AzureClientID string `json:"azureClientID"`
  148. AzureClientSecret string `json:"azureClientSecret"`
  149. AzureTenantID string `json:"azureTenantID"`
  150. AzureBillingRegion string `json:"azureBillingRegion"`
  151. CurrencyCode string `json:"currencyCode"`
  152. Discount string `json:"discount"`
  153. NegotiatedDiscount string `json:"negotiatedDiscount"`
  154. SharedCosts map[string]string `json:"sharedCost"`
  155. ClusterName string `json:"clusterName"`
  156. SharedNamespaces string `json:"sharedNamespaces"`
  157. SharedLabelNames string `json:"sharedLabelNames"`
  158. SharedLabelValues string `json:"sharedLabelValues"`
  159. ReadOnly string `json:"readOnly"`
  160. }
  161. type ServiceAccountStatus struct {
  162. Checks []*ServiceAccountCheck `json:"checks"`
  163. }
  164. type ServiceAccountCheck struct {
  165. Message string `json:"message"`
  166. Status bool `json:"status"`
  167. AdditionalInfo string `json:additionalInfo`
  168. }
  169. type PricingSources struct {
  170. PricingSources map[string]*PricingSource
  171. }
  172. type PricingSource struct {
  173. Name string `json:"name"`
  174. Available bool `json:"available"`
  175. Error string `json:"error"`
  176. }
  177. type PricingType string
  178. const (
  179. Api PricingType = "api"
  180. Spot PricingType = "spot"
  181. Reserved PricingType = "reserved"
  182. SavingsPlan PricingType = "savingsPlan"
  183. CsvExact PricingType = "csvExact"
  184. CsvClass PricingType = "csvClass"
  185. DefaultPrices PricingType = "defaultPrices"
  186. )
  187. type PricingMatchMetadata struct {
  188. TotalNodes int `json:"TotalNodes"`
  189. PricingTypeCounts map[PricingType]int `json:"PricingType"`
  190. }
  191. // Provider represents a k8s provider.
  192. type Provider interface {
  193. ClusterInfo() (map[string]string, error)
  194. GetAddresses() ([]byte, error)
  195. GetDisks() ([]byte, error)
  196. NodePricing(Key) (*Node, error)
  197. PVPricing(PVKey) (*PV, error)
  198. NetworkPricing() (*Network, error) // TODO: add key interface arg for dynamic price fetching
  199. LoadBalancerPricing() (*LoadBalancer, error) // TODO: add key interface arg for dynamic price fetching
  200. AllNodePricing() (interface{}, error)
  201. DownloadPricingData() error
  202. GetKey(map[string]string, *v1.Node) Key
  203. GetPVKey(*v1.PersistentVolume, map[string]string, string) PVKey
  204. UpdateConfig(r io.Reader, updateType string) (*CustomPricing, error)
  205. UpdateConfigFromConfigMap(map[string]string) (*CustomPricing, error)
  206. GetConfig() (*CustomPricing, error)
  207. GetManagementPlatform() (string, error)
  208. GetLocalStorageQuery(string, string, bool, bool) string
  209. ExternalAllocations(string, string, []string, string, string, bool) ([]*OutOfClusterAllocation, error)
  210. ApplyReservedInstancePricing(map[string]*Node)
  211. ServiceAccountStatus() *ServiceAccountStatus
  212. PricingSourceStatus() map[string]*PricingSource
  213. ClusterManagementPricing() (string, float64, error)
  214. CombinedDiscountForNode(string, bool, float64, float64) float64
  215. ParseID(string) string
  216. ParsePVID(string) string
  217. ParseLBID(string) string
  218. }
  219. // ClusterName returns the name defined in cluster info, defaulting to the
  220. // CLUSTER_ID environment variable
  221. func ClusterName(p Provider) string {
  222. info, err := p.ClusterInfo()
  223. if err != nil {
  224. return env.GetClusterID()
  225. }
  226. name, ok := info["name"]
  227. if !ok {
  228. return env.GetClusterID()
  229. }
  230. return name
  231. }
  232. // CustomPricesEnabled returns the boolean equivalent of the cloup provider's custom prices flag,
  233. // indicating whether or not the cluster is using custom pricing.
  234. func CustomPricesEnabled(p Provider) bool {
  235. config, err := p.GetConfig()
  236. if err != nil {
  237. return false
  238. }
  239. // TODO:CLEANUP what is going on with this?
  240. if config.NegotiatedDiscount == "" {
  241. config.NegotiatedDiscount = "0%"
  242. }
  243. return config.CustomPricesEnabled == "true"
  244. }
  245. // AllocateIdleByDefault returns true if the application settings specify to allocate idle by default
  246. func AllocateIdleByDefault(p Provider) bool {
  247. config, err := p.GetConfig()
  248. if err != nil {
  249. return false
  250. }
  251. return config.DefaultIdle == "true"
  252. }
  253. // SharedNamespace returns a list of names of shared namespaces, as defined in the application settings
  254. func SharedNamespaces(p Provider) []string {
  255. namespaces := []string{}
  256. config, err := p.GetConfig()
  257. if err != nil {
  258. return namespaces
  259. }
  260. if config.SharedNamespaces == "" {
  261. return namespaces
  262. }
  263. // trim spaces so that "kube-system, kubecost" is equivalent to "kube-system,kubecost"
  264. for _, ns := range strings.Split(config.SharedNamespaces, ",") {
  265. namespaces = append(namespaces, strings.Trim(ns, " "))
  266. }
  267. return namespaces
  268. }
  269. // SharedLabel returns the configured set of shared labels as a parallel tuple of keys to values; e.g.
  270. // for app:kubecost,type:staging this returns (["app", "type"], ["kubecost", "staging"]) in order to
  271. // match the signature of the NewSharedResourceInfo
  272. func SharedLabels(p Provider) ([]string, []string) {
  273. names := []string{}
  274. values := []string{}
  275. config, err := p.GetConfig()
  276. if err != nil {
  277. return names, values
  278. }
  279. if config.SharedLabelNames == "" || config.SharedLabelValues == "" {
  280. return names, values
  281. }
  282. ks := strings.Split(config.SharedLabelNames, ",")
  283. vs := strings.Split(config.SharedLabelValues, ",")
  284. if len(ks) != len(vs) {
  285. klog.V(2).Infof("[Warning] shared labels have mis-matched lengths: %d names, %d values", len(ks), len(vs))
  286. return names, values
  287. }
  288. for i := range ks {
  289. names = append(names, strings.Trim(ks[i], " "))
  290. values = append(values, strings.Trim(vs[i], " "))
  291. }
  292. return names, values
  293. }
  294. func NewCrossClusterProvider(ctype string, overrideConfigPath string, cache clustercache.ClusterCache) (Provider, error) {
  295. if ctype == "aws" {
  296. return &AWS{
  297. Clientset: cache,
  298. Config: NewProviderConfig(overrideConfigPath),
  299. }, nil
  300. } else if ctype == "gcp" {
  301. return &GCP{
  302. Clientset: cache,
  303. Config: NewProviderConfig(overrideConfigPath),
  304. }, nil
  305. }
  306. return &CustomProvider{
  307. Clientset: cache,
  308. Config: NewProviderConfig(overrideConfigPath),
  309. }, nil
  310. }
  311. // NewProvider looks at the nodespec or provider metadata server to decide which provider to instantiate.
  312. func NewProvider(cache clustercache.ClusterCache, apiKey string) (Provider, error) {
  313. nodes := cache.GetAllNodes()
  314. if len(nodes) == 0 {
  315. return nil, fmt.Errorf("Could not locate any nodes for cluster.")
  316. }
  317. provider := strings.ToLower(nodes[0].Spec.ProviderID)
  318. if env.IsUseCSVProvider() {
  319. klog.Infof("Using CSV Provider with CSV at %s", env.GetCSVPath())
  320. configFileName := ""
  321. if metadata.OnGCE() {
  322. configFileName = "gcp.json"
  323. } else if strings.HasPrefix(provider, "aws") {
  324. configFileName = "aws.json"
  325. } else if strings.HasPrefix(provider, "azure") {
  326. configFileName = "azure.json"
  327. } else {
  328. configFileName = "default.json"
  329. }
  330. return &CSVProvider{
  331. CSVLocation: env.GetCSVPath(),
  332. CustomProvider: &CustomProvider{
  333. Clientset: cache,
  334. Config: NewProviderConfig(configFileName),
  335. },
  336. }, nil
  337. }
  338. if metadata.OnGCE() {
  339. klog.V(3).Info("metadata reports we are in GCE")
  340. if apiKey == "" {
  341. return nil, errors.New("Supply a GCP Key to start getting data")
  342. }
  343. return &GCP{
  344. Clientset: cache,
  345. APIKey: apiKey,
  346. Config: NewProviderConfig("gcp.json"),
  347. }, nil
  348. }
  349. if strings.HasPrefix(provider, "aws") {
  350. klog.V(2).Info("Found ProviderID starting with \"aws\", using AWS Provider")
  351. return &AWS{
  352. Clientset: cache,
  353. Config: NewProviderConfig("aws.json"),
  354. }, nil
  355. } else if strings.HasPrefix(provider, "azure") {
  356. klog.V(2).Info("Found ProviderID starting with \"azure\", using Azure Provider")
  357. return &Azure{
  358. Clientset: cache,
  359. Config: NewProviderConfig("azure.json"),
  360. }, nil
  361. } else {
  362. klog.V(2).Info("Unsupported provider, falling back to default")
  363. return &CustomProvider{
  364. Clientset: cache,
  365. Config: NewProviderConfig("default.json"),
  366. }, nil
  367. }
  368. }
  369. func UpdateClusterMeta(cluster_id, cluster_name string) error {
  370. pw := env.GetRemotePW()
  371. address := env.GetSQLAddress()
  372. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  373. db, err := sql.Open("postgres", connStr)
  374. if err != nil {
  375. return err
  376. }
  377. defer db.Close()
  378. updateStmt := `UPDATE names SET cluster_name = $1 WHERE cluster_id = $2;`
  379. _, err = db.Exec(updateStmt, cluster_name, cluster_id)
  380. if err != nil {
  381. return err
  382. }
  383. return nil
  384. }
  385. func CreateClusterMeta(cluster_id, cluster_name string) error {
  386. pw := env.GetRemotePW()
  387. address := env.GetSQLAddress()
  388. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  389. db, err := sql.Open("postgres", connStr)
  390. if err != nil {
  391. return err
  392. }
  393. defer db.Close()
  394. for _, stmt := range createTableStatements {
  395. _, err := db.Exec(stmt)
  396. if err != nil {
  397. return err
  398. }
  399. }
  400. insertStmt := `INSERT INTO names (cluster_id, cluster_name) VALUES ($1, $2);`
  401. _, err = db.Exec(insertStmt, cluster_id, cluster_name)
  402. if err != nil {
  403. return err
  404. }
  405. return nil
  406. }
  407. func GetClusterMeta(cluster_id string) (string, string, error) {
  408. pw := env.GetRemotePW()
  409. address := env.GetSQLAddress()
  410. connStr := fmt.Sprintf("postgres://postgres:%s@%s:5432?sslmode=disable", pw, address)
  411. db, err := sql.Open("postgres", connStr)
  412. defer db.Close()
  413. query := `SELECT cluster_id, cluster_name
  414. FROM names
  415. WHERE cluster_id = ?`
  416. rows, err := db.Query(query, cluster_id)
  417. if err != nil {
  418. return "", "", err
  419. }
  420. defer rows.Close()
  421. var (
  422. sql_cluster_id string
  423. cluster_name string
  424. )
  425. for rows.Next() {
  426. if err := rows.Scan(&sql_cluster_id, &cluster_name); err != nil {
  427. return "", "", err
  428. }
  429. }
  430. return sql_cluster_id, cluster_name, nil
  431. }
  432. func GetOrCreateClusterMeta(cluster_id, cluster_name string) (string, string, error) {
  433. id, name, err := GetClusterMeta(cluster_id)
  434. if err != nil {
  435. err := CreateClusterMeta(cluster_id, cluster_name)
  436. if err != nil {
  437. return "", "", err
  438. }
  439. }
  440. if id == "" {
  441. err := CreateClusterMeta(cluster_id, cluster_name)
  442. if err != nil {
  443. return "", "", err
  444. }
  445. }
  446. return id, name, nil
  447. }