gcpprovider.go 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130
  1. package cloud
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "math"
  9. "net/http"
  10. "net/url"
  11. "os"
  12. "regexp"
  13. "strconv"
  14. "strings"
  15. "sync"
  16. "time"
  17. "k8s.io/klog"
  18. "cloud.google.com/go/bigquery"
  19. "cloud.google.com/go/compute/metadata"
  20. "github.com/kubecost/cost-model/clustercache"
  21. "golang.org/x/oauth2"
  22. "golang.org/x/oauth2/google"
  23. compute "google.golang.org/api/compute/v1"
  24. "google.golang.org/api/iterator"
  25. v1 "k8s.io/api/core/v1"
  26. )
  27. const GKE_GPU_TAG = "cloud.google.com/gke-accelerator"
  28. const BigqueryUpdateType = "bigqueryupdate"
  29. type userAgentTransport struct {
  30. userAgent string
  31. base http.RoundTripper
  32. }
  33. func (t userAgentTransport) RoundTrip(req *http.Request) (*http.Response, error) {
  34. req.Header.Set("User-Agent", t.userAgent)
  35. return t.base.RoundTrip(req)
  36. }
  37. // GCP implements a provider interface for GCP
  38. type GCP struct {
  39. Pricing map[string]*GCPPricing
  40. Clientset clustercache.ClusterCache
  41. APIKey string
  42. BaseCPUPrice string
  43. ProjectID string
  44. BillingDataDataset string
  45. DownloadPricingDataLock sync.RWMutex
  46. ReservedInstances []*GCPReservedInstance
  47. Config *ProviderConfig
  48. *CustomProvider
  49. }
  50. type gcpAllocation struct {
  51. Aggregator bigquery.NullString
  52. Environment bigquery.NullString
  53. Service string
  54. Cost float64
  55. }
  56. func gcpAllocationToOutOfClusterAllocation(gcpAlloc gcpAllocation) *OutOfClusterAllocation {
  57. var aggregator string
  58. if gcpAlloc.Aggregator.Valid {
  59. aggregator = gcpAlloc.Aggregator.StringVal
  60. }
  61. var environment string
  62. if gcpAlloc.Environment.Valid {
  63. environment = gcpAlloc.Environment.StringVal
  64. }
  65. return &OutOfClusterAllocation{
  66. Aggregator: aggregator,
  67. Environment: environment,
  68. Service: gcpAlloc.Service,
  69. Cost: gcpAlloc.Cost,
  70. }
  71. }
  72. func (gcp *GCP) GetLocalStorageQuery(window, offset string, rate bool) string {
  73. // TODO Set to the price for the appropriate storage class. It's not trivial to determine the local storage disk type
  74. // See https://cloud.google.com/compute/disks-image-pricing#persistentdisk
  75. localStorageCost := 0.04
  76. fmtOffset := ""
  77. if offset != "" {
  78. fmtOffset = fmt.Sprintf("offset %s", offset)
  79. }
  80. fmtCumulativeQuery := `sum(
  81. sum_over_time(container_fs_limit_bytes{device!="tmpfs", id="/"}[%s:1m]%s)
  82. ) by (cluster_id) / 60 / 730 / 1024 / 1024 / 1024 * %f`
  83. fmtMonthlyQuery := `sum(
  84. avg_over_time(container_fs_limit_bytes{device!="tmpfs", id="/"}[%s:1m]%s)
  85. ) by (cluster_id) / 1024 / 1024 / 1024 * %f`
  86. fmtQuery := fmtCumulativeQuery
  87. if rate {
  88. fmtQuery = fmtMonthlyQuery
  89. }
  90. return fmt.Sprintf(fmtQuery, window, fmtOffset, localStorageCost)
  91. }
  92. func (gcp *GCP) GetConfig() (*CustomPricing, error) {
  93. c, err := gcp.Config.GetCustomPricingData()
  94. if err != nil {
  95. return nil, err
  96. }
  97. if c.Discount == "" {
  98. c.Discount = "30%"
  99. }
  100. if c.NegotiatedDiscount == "" {
  101. c.NegotiatedDiscount = "0%"
  102. }
  103. return c, nil
  104. }
  105. type BigQueryConfig struct {
  106. ProjectID string `json:"projectID"`
  107. BillingDataDataset string `json:"billingDataDataset"`
  108. Key map[string]string `json:"key"`
  109. }
  110. func (gcp *GCP) GetManagementPlatform() (string, error) {
  111. nodes := gcp.Clientset.GetAllNodes()
  112. if len(nodes) > 0 {
  113. n := nodes[0]
  114. version := n.Status.NodeInfo.KubeletVersion
  115. if strings.Contains(version, "gke") {
  116. return "gke", nil
  117. }
  118. }
  119. return "", nil
  120. }
  121. func (gcp *GCP) UpdateConfigFromConfigMap(a map[string]string) (*CustomPricing, error) {
  122. return gcp.Config.UpdateFromMap(a)
  123. }
  124. func (gcp *GCP) UpdateConfig(r io.Reader, updateType string) (*CustomPricing, error) {
  125. return gcp.Config.Update(func(c *CustomPricing) error {
  126. if updateType == BigqueryUpdateType {
  127. a := BigQueryConfig{}
  128. err := json.NewDecoder(r).Decode(&a)
  129. if err != nil {
  130. return err
  131. }
  132. c.ProjectID = a.ProjectID
  133. c.BillingDataDataset = a.BillingDataDataset
  134. j, err := json.Marshal(a.Key)
  135. if err != nil {
  136. return err
  137. }
  138. path := os.Getenv("CONFIG_PATH")
  139. if path == "" {
  140. path = "/models/"
  141. }
  142. keyPath := path + "key.json"
  143. err = ioutil.WriteFile(keyPath, j, 0644)
  144. if err != nil {
  145. return err
  146. }
  147. } else {
  148. a := make(map[string]interface{})
  149. err := json.NewDecoder(r).Decode(&a)
  150. if err != nil {
  151. return err
  152. }
  153. for k, v := range a {
  154. kUpper := strings.Title(k) // Just so we consistently supply / receive the same values, uppercase the first letter.
  155. vstr, ok := v.(string)
  156. if ok {
  157. err := SetCustomPricingField(c, kUpper, vstr)
  158. if err != nil {
  159. return err
  160. }
  161. } else {
  162. sci := v.(map[string]interface{})
  163. sc := make(map[string]string)
  164. for k, val := range sci {
  165. sc[k] = val.(string)
  166. }
  167. c.SharedCosts = sc //todo: support reflection/multiple map fields
  168. }
  169. }
  170. }
  171. remoteEnabled := os.Getenv(remoteEnabled)
  172. if remoteEnabled == "true" {
  173. err := UpdateClusterMeta(os.Getenv(clusterIDKey), c.ClusterName)
  174. if err != nil {
  175. return err
  176. }
  177. }
  178. return nil
  179. })
  180. }
  181. // ExternalAllocations represents tagged assets outside the scope of kubernetes.
  182. // "start" and "end" are dates of the format YYYY-MM-DD
  183. // "aggregator" is the tag used to determine how to allocate those assets, ie namespace, pod, etc.
  184. func (gcp *GCP) ExternalAllocations(start string, end string, aggregator string, filterType string, filterValue string) ([]*OutOfClusterAllocation, error) {
  185. c, err := gcp.Config.GetCustomPricingData()
  186. if err != nil {
  187. return nil, err
  188. }
  189. // start, end formatted like: "2019-04-20 00:00:00"
  190. queryString := fmt.Sprintf(`SELECT
  191. service,
  192. labels.key as aggregator,
  193. labels.value as environment,
  194. SUM(cost) as cost
  195. FROM (SELECT
  196. service.description as service,
  197. labels,
  198. cost
  199. FROM %s
  200. WHERE usage_start_time >= "%s" AND usage_start_time < "%s")
  201. LEFT JOIN UNNEST(labels) as labels
  202. ON labels.key = "%s"
  203. GROUP BY aggregator, environment, service;`, c.BillingDataDataset, start, end, aggregator) // For example, "billing_data.gcp_billing_export_v1_01AC9F_74CF1D_5565A2"
  204. klog.V(4).Infof("Querying \"%s\" with : %s", c.ProjectID, queryString)
  205. return gcp.QuerySQL(queryString)
  206. }
  207. // QuerySQL should query BigQuery for billing data for out of cluster costs.
  208. func (gcp *GCP) QuerySQL(query string) ([]*OutOfClusterAllocation, error) {
  209. c, err := gcp.Config.GetCustomPricingData()
  210. if err != nil {
  211. return nil, err
  212. }
  213. ctx := context.Background()
  214. client, err := bigquery.NewClient(ctx, c.ProjectID) // For example, "guestbook-227502"
  215. if err != nil {
  216. return nil, err
  217. }
  218. q := client.Query(query)
  219. it, err := q.Read(ctx)
  220. if err != nil {
  221. return nil, err
  222. }
  223. var allocations []*OutOfClusterAllocation
  224. for {
  225. var a gcpAllocation
  226. err := it.Next(&a)
  227. if err == iterator.Done {
  228. break
  229. }
  230. if err != nil {
  231. return nil, err
  232. }
  233. allocations = append(allocations, gcpAllocationToOutOfClusterAllocation(a))
  234. }
  235. return allocations, nil
  236. }
  237. // ClusterName returns the name of a GKE cluster, as provided by metadata.
  238. func (gcp *GCP) ClusterInfo() (map[string]string, error) {
  239. remote := os.Getenv(remoteEnabled)
  240. remoteEnabled := false
  241. if os.Getenv(remote) == "true" {
  242. remoteEnabled = true
  243. }
  244. metadataClient := metadata.NewClient(&http.Client{Transport: userAgentTransport{
  245. userAgent: "kubecost",
  246. base: http.DefaultTransport,
  247. }})
  248. attribute, err := metadataClient.InstanceAttributeValue("cluster-name")
  249. if err != nil {
  250. klog.Infof("Error loading metadata cluster-name: %s", err.Error())
  251. }
  252. c, err := gcp.GetConfig()
  253. if err != nil {
  254. klog.V(1).Infof("Error opening config: %s", err.Error())
  255. }
  256. if c.ClusterName != "" {
  257. attribute = c.ClusterName
  258. }
  259. m := make(map[string]string)
  260. m["name"] = attribute
  261. m["provider"] = "GCP"
  262. m["id"] = os.Getenv(clusterIDKey)
  263. m["remoteReadEnabled"] = strconv.FormatBool(remoteEnabled)
  264. return m, nil
  265. }
  266. // AddServiceKey adds the service key as required for GetDisks
  267. func (*GCP) AddServiceKey(formValues url.Values) error {
  268. key := formValues.Get("key")
  269. k := []byte(key)
  270. return ioutil.WriteFile("/var/configs/key.json", k, 0644)
  271. }
  272. // GetDisks returns the GCP disks backing PVs. Useful because sometimes k8s will not clean up PVs correctly. Requires a json config in /var/configs with key region.
  273. func (*GCP) GetDisks() ([]byte, error) {
  274. // metadata API setup
  275. metadataClient := metadata.NewClient(&http.Client{Transport: userAgentTransport{
  276. userAgent: "kubecost",
  277. base: http.DefaultTransport,
  278. }})
  279. projID, err := metadataClient.ProjectID()
  280. if err != nil {
  281. return nil, err
  282. }
  283. client, err := google.DefaultClient(oauth2.NoContext,
  284. "https://www.googleapis.com/auth/compute.readonly")
  285. if err != nil {
  286. return nil, err
  287. }
  288. svc, err := compute.New(client)
  289. if err != nil {
  290. return nil, err
  291. }
  292. res, err := svc.Disks.AggregatedList(projID).Do()
  293. if err != nil {
  294. return nil, err
  295. }
  296. return json.Marshal(res)
  297. }
  298. // GCPPricing represents GCP pricing data for a SKU
  299. type GCPPricing struct {
  300. Name string `json:"name"`
  301. SKUID string `json:"skuId"`
  302. Description string `json:"description"`
  303. Category *GCPResourceInfo `json:"category"`
  304. ServiceRegions []string `json:"serviceRegions"`
  305. PricingInfo []*PricingInfo `json:"pricingInfo"`
  306. ServiceProviderName string `json:"serviceProviderName"`
  307. Node *Node `json:"node"`
  308. PV *PV `json:"pv"`
  309. }
  310. // PricingInfo contains metadata about a cost.
  311. type PricingInfo struct {
  312. Summary string `json:"summary"`
  313. PricingExpression *PricingExpression `json:"pricingExpression"`
  314. CurrencyConversionRate int `json:"currencyConversionRate"`
  315. EffectiveTime string `json:""`
  316. }
  317. // PricingExpression contains metadata about a cost.
  318. type PricingExpression struct {
  319. UsageUnit string `json:"usageUnit"`
  320. UsageUnitDescription string `json:"usageUnitDescription"`
  321. BaseUnit string `json:"baseUnit"`
  322. BaseUnitConversionFactor int64 `json:"-"`
  323. DisplayQuantity int `json:"displayQuantity"`
  324. TieredRates []*TieredRates `json:"tieredRates"`
  325. }
  326. // TieredRates contain data about variable pricing.
  327. type TieredRates struct {
  328. StartUsageAmount int `json:"startUsageAmount"`
  329. UnitPrice *UnitPriceInfo `json:"unitPrice"`
  330. }
  331. // UnitPriceInfo contains data about the actual price being charged.
  332. type UnitPriceInfo struct {
  333. CurrencyCode string `json:"currencyCode"`
  334. Units string `json:"units"`
  335. Nanos float64 `json:"nanos"`
  336. }
  337. // GCPResourceInfo contains metadata about the node.
  338. type GCPResourceInfo struct {
  339. ServiceDisplayName string `json:"serviceDisplayName"`
  340. ResourceFamily string `json:"resourceFamily"`
  341. ResourceGroup string `json:"resourceGroup"`
  342. UsageType string `json:"usageType"`
  343. }
  344. func (gcp *GCP) parsePage(r io.Reader, inputKeys map[string]Key, pvKeys map[string]PVKey) (map[string]*GCPPricing, string, error) {
  345. gcpPricingList := make(map[string]*GCPPricing)
  346. var nextPageToken string
  347. dec := json.NewDecoder(r)
  348. for {
  349. t, err := dec.Token()
  350. if err == io.EOF {
  351. break
  352. }
  353. if t == "skus" {
  354. _, err := dec.Token() // consumes [
  355. if err != nil {
  356. return nil, "", err
  357. }
  358. for dec.More() {
  359. product := &GCPPricing{}
  360. err := dec.Decode(&product)
  361. if err != nil {
  362. return nil, "", err
  363. }
  364. usageType := strings.ToLower(product.Category.UsageType)
  365. instanceType := strings.ToLower(product.Category.ResourceGroup)
  366. if instanceType == "ssd" && !strings.Contains(product.Description, "Regional") { // TODO: support regional
  367. lastRateIndex := len(product.PricingInfo[0].PricingExpression.TieredRates) - 1
  368. var nanos float64
  369. if len(product.PricingInfo) > 0 {
  370. nanos = product.PricingInfo[0].PricingExpression.TieredRates[lastRateIndex].UnitPrice.Nanos
  371. } else {
  372. continue
  373. }
  374. hourlyPrice := (nanos * math.Pow10(-9)) / 730
  375. for _, sr := range product.ServiceRegions {
  376. region := sr
  377. candidateKey := region + "," + "ssd"
  378. if _, ok := pvKeys[candidateKey]; ok {
  379. product.PV = &PV{
  380. Cost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
  381. }
  382. gcpPricingList[candidateKey] = product
  383. continue
  384. }
  385. }
  386. continue
  387. } else if instanceType == "pdstandard" && !strings.Contains(product.Description, "Regional") { // TODO: support regional
  388. lastRateIndex := len(product.PricingInfo[0].PricingExpression.TieredRates) - 1
  389. var nanos float64
  390. if len(product.PricingInfo) > 0 {
  391. nanos = product.PricingInfo[0].PricingExpression.TieredRates[lastRateIndex].UnitPrice.Nanos
  392. } else {
  393. continue
  394. }
  395. hourlyPrice := (nanos * math.Pow10(-9)) / 730
  396. for _, sr := range product.ServiceRegions {
  397. region := sr
  398. candidateKey := region + "," + "pdstandard"
  399. if _, ok := pvKeys[candidateKey]; ok {
  400. product.PV = &PV{
  401. Cost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
  402. }
  403. gcpPricingList[candidateKey] = product
  404. continue
  405. }
  406. }
  407. continue
  408. }
  409. if (instanceType == "ram" || instanceType == "cpu") && strings.Contains(strings.ToUpper(product.Description), "CUSTOM") {
  410. instanceType = "custom"
  411. }
  412. if (instanceType == "ram" || instanceType == "cpu") && strings.Contains(strings.ToUpper(product.Description), "N2") {
  413. instanceType = "n2standard"
  414. }
  415. if (instanceType == "ram" || instanceType == "cpu") && strings.Contains(strings.ToUpper(product.Description), "E2 INSTANCE") {
  416. instanceType = "e2"
  417. }
  418. partialCPUMap := make(map[string]float64)
  419. partialCPUMap["e2micro"] = 0.25
  420. partialCPUMap["e2small"] = 0.5
  421. partialCPUMap["e2medium"] = 1
  422. /*
  423. var partialCPU float64
  424. if strings.ToLower(instanceType) == "f1micro" {
  425. partialCPU = 0.2
  426. } else if strings.ToLower(instanceType) == "g1small" {
  427. partialCPU = 0.5
  428. }
  429. */
  430. var gpuType string
  431. provIdRx := regexp.MustCompile("(Nvidia Tesla [^ ]+) ")
  432. for matchnum, group := range provIdRx.FindStringSubmatch(product.Description) {
  433. if matchnum == 1 {
  434. gpuType = strings.ToLower(strings.Join(strings.Split(group, " "), "-"))
  435. klog.V(4).Info("GPU type found: " + gpuType)
  436. }
  437. }
  438. candidateKeys := []string{}
  439. for _, region := range product.ServiceRegions {
  440. if instanceType == "e2" { // this needs to be done to handle a partial cpu mapping
  441. candidateKeys = append(candidateKeys, region+","+"e2micro"+","+usageType)
  442. candidateKeys = append(candidateKeys, region+","+"e2small"+","+usageType)
  443. candidateKeys = append(candidateKeys, region+","+"e2medium"+","+usageType)
  444. candidateKeys = append(candidateKeys, region+","+"e2standard"+","+usageType)
  445. } else {
  446. candidateKey := region + "," + instanceType + "," + usageType
  447. candidateKeys = append(candidateKeys, candidateKey)
  448. }
  449. }
  450. for _, candidateKey := range candidateKeys {
  451. instanceType = strings.Split(candidateKey, ",")[1] // we may have overriden this while generating candidate keys
  452. region := strings.Split(candidateKey, ",")[0]
  453. candidateKeyGPU := candidateKey + ",gpu"
  454. if gpuType != "" {
  455. lastRateIndex := len(product.PricingInfo[0].PricingExpression.TieredRates) - 1
  456. var nanos float64
  457. if len(product.PricingInfo) > 0 {
  458. nanos = product.PricingInfo[0].PricingExpression.TieredRates[lastRateIndex].UnitPrice.Nanos
  459. } else {
  460. continue
  461. }
  462. hourlyPrice := nanos * math.Pow10(-9)
  463. for k, key := range inputKeys {
  464. if key.GPUType() == gpuType+","+usageType {
  465. if region == strings.Split(k, ",")[0] {
  466. klog.V(3).Infof("Matched GPU to node in region \"%s\"", region)
  467. klog.V(4).Infof("PRODUCT DESCRIPTION: %s", product.Description)
  468. matchedKey := key.Features()
  469. if pl, ok := gcpPricingList[matchedKey]; ok {
  470. pl.Node.GPUName = gpuType
  471. pl.Node.GPUCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
  472. pl.Node.GPU = "1"
  473. } else {
  474. product.Node = &Node{
  475. GPUName: gpuType,
  476. GPUCost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
  477. GPU: "1",
  478. }
  479. gcpPricingList[matchedKey] = product
  480. }
  481. klog.V(3).Infof("Added data for " + matchedKey)
  482. }
  483. }
  484. }
  485. } else {
  486. _, ok := inputKeys[candidateKey]
  487. _, ok2 := inputKeys[candidateKeyGPU]
  488. if ok || ok2 {
  489. lastRateIndex := len(product.PricingInfo[0].PricingExpression.TieredRates) - 1
  490. var nanos float64
  491. if len(product.PricingInfo) > 0 {
  492. nanos = product.PricingInfo[0].PricingExpression.TieredRates[lastRateIndex].UnitPrice.Nanos
  493. } else {
  494. continue
  495. }
  496. hourlyPrice := nanos * math.Pow10(-9)
  497. if hourlyPrice == 0 {
  498. continue
  499. } else if strings.Contains(strings.ToUpper(product.Description), "RAM") {
  500. if instanceType == "custom" {
  501. klog.V(4).Infof("RAM custom sku is: " + product.Name)
  502. }
  503. if _, ok := gcpPricingList[candidateKey]; ok {
  504. gcpPricingList[candidateKey].Node.RAMCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
  505. } else {
  506. product = &GCPPricing{}
  507. product.Node = &Node{
  508. RAMCost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
  509. }
  510. partialCPU, pcok := partialCPUMap[instanceType]
  511. if pcok {
  512. product.Node.VCPU = fmt.Sprintf("%f", partialCPU)
  513. }
  514. product.Node.UsageType = usageType
  515. gcpPricingList[candidateKey] = product
  516. }
  517. if _, ok := gcpPricingList[candidateKeyGPU]; ok {
  518. klog.V(1).Infof("Adding RAM %f for %s", hourlyPrice, candidateKeyGPU)
  519. gcpPricingList[candidateKeyGPU].Node.RAMCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
  520. } else {
  521. klog.V(1).Infof("Adding RAM %f for %s", hourlyPrice, candidateKeyGPU)
  522. product = &GCPPricing{}
  523. product.Node = &Node{
  524. RAMCost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
  525. }
  526. partialCPU, pcok := partialCPUMap[instanceType]
  527. if pcok {
  528. product.Node.VCPU = fmt.Sprintf("%f", partialCPU)
  529. }
  530. product.Node.UsageType = usageType
  531. gcpPricingList[candidateKeyGPU] = product
  532. }
  533. break
  534. } else {
  535. if _, ok := gcpPricingList[candidateKey]; ok {
  536. gcpPricingList[candidateKey].Node.VCPUCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
  537. } else {
  538. product = &GCPPricing{}
  539. product.Node = &Node{
  540. VCPUCost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
  541. }
  542. partialCPU, pcok := partialCPUMap[instanceType]
  543. if pcok {
  544. product.Node.VCPU = fmt.Sprintf("%f", partialCPU)
  545. }
  546. product.Node.UsageType = usageType
  547. gcpPricingList[candidateKey] = product
  548. }
  549. if _, ok := gcpPricingList[candidateKeyGPU]; ok {
  550. gcpPricingList[candidateKeyGPU].Node.VCPUCost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
  551. } else {
  552. product = &GCPPricing{}
  553. product.Node = &Node{
  554. VCPUCost: strconv.FormatFloat(hourlyPrice, 'f', -1, 64),
  555. }
  556. partialCPU, pcok := partialCPUMap[instanceType]
  557. if pcok {
  558. product.Node.VCPU = fmt.Sprintf("%f", partialCPU)
  559. }
  560. product.Node.UsageType = usageType
  561. gcpPricingList[candidateKeyGPU] = product
  562. }
  563. break
  564. }
  565. }
  566. }
  567. }
  568. }
  569. }
  570. if t == "nextPageToken" {
  571. pageToken, err := dec.Token()
  572. if err != nil {
  573. klog.V(2).Infof("Error parsing nextpage token: " + err.Error())
  574. return nil, "", err
  575. }
  576. if pageToken.(string) != "" {
  577. nextPageToken = pageToken.(string)
  578. } else {
  579. nextPageToken = "done"
  580. }
  581. }
  582. }
  583. return gcpPricingList, nextPageToken, nil
  584. }
  585. func (gcp *GCP) parsePages(inputKeys map[string]Key, pvKeys map[string]PVKey) (map[string]*GCPPricing, error) {
  586. var pages []map[string]*GCPPricing
  587. url := "https://cloudbilling.googleapis.com/v1/services/6F81-5844-456A/skus?key=" + gcp.APIKey
  588. klog.V(2).Infof("Fetch GCP Billing Data from URL: %s", url)
  589. var parsePagesHelper func(string) error
  590. parsePagesHelper = func(pageToken string) error {
  591. if pageToken == "done" {
  592. return nil
  593. } else if pageToken != "" {
  594. url = url + "&pageToken=" + pageToken
  595. }
  596. resp, err := http.Get(url)
  597. if err != nil {
  598. return err
  599. }
  600. page, token, err := gcp.parsePage(resp.Body, inputKeys, pvKeys)
  601. if err != nil {
  602. return err
  603. }
  604. pages = append(pages, page)
  605. return parsePagesHelper(token)
  606. }
  607. err := parsePagesHelper("")
  608. if err != nil {
  609. return nil, err
  610. }
  611. returnPages := make(map[string]*GCPPricing)
  612. for _, page := range pages {
  613. for k, v := range page {
  614. if val, ok := returnPages[k]; ok { //keys may need to be merged
  615. if val.Node != nil {
  616. if val.Node.VCPUCost == "" {
  617. val.Node.VCPUCost = v.Node.VCPUCost
  618. }
  619. if val.Node.RAMCost == "" {
  620. val.Node.RAMCost = v.Node.RAMCost
  621. }
  622. if val.Node.GPUCost == "" {
  623. val.Node.GPUCost = v.Node.GPUCost
  624. val.Node.GPU = v.Node.GPU
  625. val.Node.GPUName = v.Node.GPUName
  626. }
  627. }
  628. if val.PV != nil {
  629. if val.PV.Cost == "" {
  630. val.PV.Cost = v.PV.Cost
  631. }
  632. }
  633. } else {
  634. returnPages[k] = v
  635. }
  636. }
  637. }
  638. klog.V(1).Infof("ALL PAGES: %+v", returnPages)
  639. for k, v := range returnPages {
  640. klog.V(1).Infof("Returned Page: %s : %+v", k, v.Node)
  641. }
  642. return returnPages, err
  643. }
  644. // DownloadPricingData fetches data from the GCP Pricing API. Requires a key-- a kubecost key is provided for quickstart, but should be replaced by a users.
  645. func (gcp *GCP) DownloadPricingData() error {
  646. gcp.DownloadPricingDataLock.Lock()
  647. defer gcp.DownloadPricingDataLock.Unlock()
  648. c, err := gcp.Config.GetCustomPricingData()
  649. if err != nil {
  650. klog.V(2).Infof("Error downloading default pricing data: %s", err.Error())
  651. return err
  652. }
  653. gcp.BaseCPUPrice = c.CPU
  654. gcp.ProjectID = c.ProjectID
  655. gcp.BillingDataDataset = c.BillingDataDataset
  656. nodeList := gcp.Clientset.GetAllNodes()
  657. inputkeys := make(map[string]Key)
  658. for _, n := range nodeList {
  659. labels := n.GetObjectMeta().GetLabels()
  660. key := gcp.GetKey(labels)
  661. inputkeys[key.Features()] = key
  662. }
  663. pvList := gcp.Clientset.GetAllPersistentVolumes()
  664. storageClasses := gcp.Clientset.GetAllStorageClasses()
  665. storageClassMap := make(map[string]map[string]string)
  666. for _, storageClass := range storageClasses {
  667. params := storageClass.Parameters
  668. storageClassMap[storageClass.ObjectMeta.Name] = params
  669. if storageClass.GetAnnotations()["storageclass.kubernetes.io/is-default-class"] == "true" || storageClass.GetAnnotations()["storageclass.beta.kubernetes.io/is-default-class"] == "true" {
  670. storageClassMap["default"] = params
  671. storageClassMap[""] = params
  672. }
  673. }
  674. pvkeys := make(map[string]PVKey)
  675. for _, pv := range pvList {
  676. params, ok := storageClassMap[pv.Spec.StorageClassName]
  677. if !ok {
  678. klog.Infof("Unable to find params for storageClassName %s", pv.Name)
  679. continue
  680. }
  681. key := gcp.GetPVKey(pv, params)
  682. pvkeys[key.Features()] = key
  683. }
  684. reserved, err := gcp.getReservedInstances()
  685. if err != nil {
  686. klog.V(1).Infof("Failed to lookup reserved instance data: %s", err.Error())
  687. } else {
  688. klog.V(1).Infof("Found %d reserved instances", len(reserved))
  689. gcp.ReservedInstances = reserved
  690. for _, r := range reserved {
  691. klog.V(1).Infof("%s", r)
  692. }
  693. }
  694. pages, err := gcp.parsePages(inputkeys, pvkeys)
  695. if err != nil {
  696. return err
  697. }
  698. gcp.Pricing = pages
  699. return nil
  700. }
  701. func (gcp *GCP) PVPricing(pvk PVKey) (*PV, error) {
  702. gcp.DownloadPricingDataLock.RLock()
  703. defer gcp.DownloadPricingDataLock.RUnlock()
  704. pricing, ok := gcp.Pricing[pvk.Features()]
  705. if !ok {
  706. klog.V(4).Infof("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
  707. return &PV{}, nil
  708. }
  709. return pricing.PV, nil
  710. }
  711. // Stubbed NetworkPricing for GCP. Pull directly from gcp.json for now
  712. func (gcp *GCP) NetworkPricing() (*Network, error) {
  713. cpricing, err := gcp.Config.GetCustomPricingData()
  714. if err != nil {
  715. return nil, err
  716. }
  717. znec, err := strconv.ParseFloat(cpricing.ZoneNetworkEgress, 64)
  718. if err != nil {
  719. return nil, err
  720. }
  721. rnec, err := strconv.ParseFloat(cpricing.RegionNetworkEgress, 64)
  722. if err != nil {
  723. return nil, err
  724. }
  725. inec, err := strconv.ParseFloat(cpricing.InternetNetworkEgress, 64)
  726. if err != nil {
  727. return nil, err
  728. }
  729. return &Network{
  730. ZoneNetworkEgressCost: znec,
  731. RegionNetworkEgressCost: rnec,
  732. InternetNetworkEgressCost: inec,
  733. }, nil
  734. }
  735. const (
  736. GCPReservedInstanceResourceTypeRAM string = "MEMORY"
  737. GCPReservedInstanceResourceTypeCPU string = "VCPU"
  738. GCPReservedInstanceStatusActive string = "ACTIVE"
  739. GCPReservedInstancePlanOneYear string = "TWELVE_MONTH"
  740. GCPReservedInstancePlanThreeYear string = "THIRTY_SIX_MONTH"
  741. )
  742. type GCPReservedInstancePlan struct {
  743. Name string
  744. CPUCost float64
  745. RAMCost float64
  746. }
  747. type GCPReservedInstance struct {
  748. ReservedRAM int64
  749. ReservedCPU int64
  750. Plan *GCPReservedInstancePlan
  751. StartDate time.Time
  752. EndDate time.Time
  753. Region string
  754. }
  755. func (r *GCPReservedInstance) String() string {
  756. return fmt.Sprintf("[CPU: %d, RAM: %d, Region: %s, Start: %s, End: %s]", r.ReservedCPU, r.ReservedRAM, r.Region, r.StartDate.String(), r.EndDate.String())
  757. }
  758. type GCPReservedCounter struct {
  759. RemainingCPU int64
  760. RemainingRAM int64
  761. Instance *GCPReservedInstance
  762. }
  763. func newReservedCounter(instance *GCPReservedInstance) *GCPReservedCounter {
  764. return &GCPReservedCounter{
  765. RemainingCPU: instance.ReservedCPU,
  766. RemainingRAM: instance.ReservedRAM,
  767. Instance: instance,
  768. }
  769. }
  770. // Two available Reservation plans for GCP, 1-year and 3-year
  771. var gcpReservedInstancePlans map[string]*GCPReservedInstancePlan = map[string]*GCPReservedInstancePlan{
  772. GCPReservedInstancePlanOneYear: &GCPReservedInstancePlan{
  773. Name: GCPReservedInstancePlanOneYear,
  774. CPUCost: 0.019915,
  775. RAMCost: 0.002669,
  776. },
  777. GCPReservedInstancePlanThreeYear: &GCPReservedInstancePlan{
  778. Name: GCPReservedInstancePlanThreeYear,
  779. CPUCost: 0.014225,
  780. RAMCost: 0.001907,
  781. },
  782. }
  783. func (gcp *GCP) ApplyReservedInstancePricing(nodes map[string]*Node) {
  784. numReserved := len(gcp.ReservedInstances)
  785. // Early return if no reserved instance data loaded
  786. if numReserved == 0 {
  787. klog.V(4).Infof("[Reserved] No Reserved Instances")
  788. return
  789. }
  790. now := time.Now()
  791. counters := make(map[string][]*GCPReservedCounter)
  792. for _, r := range gcp.ReservedInstances {
  793. if now.Before(r.StartDate) || now.After(r.EndDate) {
  794. klog.V(1).Infof("[Reserved] Skipped Reserved Instance due to dates")
  795. continue
  796. }
  797. _, ok := counters[r.Region]
  798. counter := newReservedCounter(r)
  799. if !ok {
  800. counters[r.Region] = []*GCPReservedCounter{counter}
  801. } else {
  802. counters[r.Region] = append(counters[r.Region], counter)
  803. }
  804. }
  805. gcpNodes := make(map[string]*v1.Node)
  806. currentNodes := gcp.Clientset.GetAllNodes()
  807. // Create a node name -> node map
  808. for _, gcpNode := range currentNodes {
  809. gcpNodes[gcpNode.GetName()] = gcpNode
  810. }
  811. // go through all provider nodes using k8s nodes for region
  812. for nodeName, node := range nodes {
  813. // Reset reserved allocation to prevent double allocation
  814. node.Reserved = nil
  815. kNode, ok := gcpNodes[nodeName]
  816. if !ok {
  817. klog.V(4).Infof("[Reserved] Could not find K8s Node with name: %s", nodeName)
  818. continue
  819. }
  820. nodeRegion, ok := kNode.Labels[v1.LabelZoneRegion]
  821. if !ok {
  822. klog.V(4).Infof("[Reserved] Could not find node region")
  823. continue
  824. }
  825. reservedCounters, ok := counters[nodeRegion]
  826. if !ok {
  827. klog.V(4).Infof("[Reserved] Could not find counters for region: %s", nodeRegion)
  828. continue
  829. }
  830. node.Reserved = &ReservedInstanceData{
  831. ReservedCPU: 0,
  832. ReservedRAM: 0,
  833. }
  834. for _, reservedCounter := range reservedCounters {
  835. if reservedCounter.RemainingCPU != 0 {
  836. nodeCPU, _ := strconv.ParseInt(node.VCPU, 10, 64)
  837. nodeCPU -= node.Reserved.ReservedCPU
  838. node.Reserved.CPUCost = reservedCounter.Instance.Plan.CPUCost
  839. if reservedCounter.RemainingCPU >= nodeCPU {
  840. reservedCounter.RemainingCPU -= nodeCPU
  841. node.Reserved.ReservedCPU += nodeCPU
  842. } else {
  843. node.Reserved.ReservedCPU += reservedCounter.RemainingCPU
  844. reservedCounter.RemainingCPU = 0
  845. }
  846. }
  847. if reservedCounter.RemainingRAM != 0 {
  848. nodeRAMF, _ := strconv.ParseFloat(node.RAMBytes, 64)
  849. nodeRAM := int64(nodeRAMF)
  850. nodeRAM -= node.Reserved.ReservedRAM
  851. node.Reserved.RAMCost = reservedCounter.Instance.Plan.RAMCost
  852. if reservedCounter.RemainingRAM >= nodeRAM {
  853. reservedCounter.RemainingRAM -= nodeRAM
  854. node.Reserved.ReservedRAM += nodeRAM
  855. } else {
  856. node.Reserved.ReservedRAM += reservedCounter.RemainingRAM
  857. reservedCounter.RemainingRAM = 0
  858. }
  859. }
  860. }
  861. }
  862. }
  863. func (gcp *GCP) getReservedInstances() ([]*GCPReservedInstance, error) {
  864. var results []*GCPReservedInstance
  865. ctx := context.Background()
  866. computeService, err := compute.NewService(ctx)
  867. if err != nil {
  868. return nil, err
  869. }
  870. commitments, err := computeService.RegionCommitments.AggregatedList(gcp.ProjectID).Do()
  871. if err != nil {
  872. return nil, err
  873. }
  874. for regionKey, commitList := range commitments.Items {
  875. for _, commit := range commitList.Commitments {
  876. if commit.Status != GCPReservedInstanceStatusActive {
  877. continue
  878. }
  879. var vcpu int64 = 0
  880. var ram int64 = 0
  881. for _, resource := range commit.Resources {
  882. switch resource.Type {
  883. case GCPReservedInstanceResourceTypeRAM:
  884. ram = resource.Amount * 1024 * 1024
  885. case GCPReservedInstanceResourceTypeCPU:
  886. vcpu = resource.Amount
  887. default:
  888. klog.V(4).Infof("Failed to handle resource type: %s", resource.Type)
  889. }
  890. }
  891. var region string
  892. regionStr := strings.Split(regionKey, "/")
  893. if len(regionStr) == 2 {
  894. region = regionStr[1]
  895. }
  896. timeLayout := "2006-01-02T15:04:05Z07:00"
  897. startTime, err := time.Parse(timeLayout, commit.StartTimestamp)
  898. if err != nil {
  899. klog.V(1).Infof("Failed to parse start date: %s", commit.StartTimestamp)
  900. continue
  901. }
  902. endTime, err := time.Parse(timeLayout, commit.EndTimestamp)
  903. if err != nil {
  904. klog.V(1).Infof("Failed to parse end date: %s", commit.EndTimestamp)
  905. continue
  906. }
  907. // Look for a plan based on the name. Default to One Year if it fails
  908. plan, ok := gcpReservedInstancePlans[commit.Plan]
  909. if !ok {
  910. plan = gcpReservedInstancePlans[GCPReservedInstancePlanOneYear]
  911. }
  912. results = append(results, &GCPReservedInstance{
  913. Region: region,
  914. ReservedRAM: ram,
  915. ReservedCPU: vcpu,
  916. Plan: plan,
  917. StartDate: startTime,
  918. EndDate: endTime,
  919. })
  920. }
  921. }
  922. return results, nil
  923. }
  924. type pvKey struct {
  925. Labels map[string]string
  926. StorageClass string
  927. StorageClassParameters map[string]string
  928. }
  929. func (key *pvKey) GetStorageClass() string {
  930. return key.StorageClass
  931. }
  932. func (gcp *GCP) GetPVKey(pv *v1.PersistentVolume, parameters map[string]string) PVKey {
  933. return &pvKey{
  934. Labels: pv.Labels,
  935. StorageClass: pv.Spec.StorageClassName,
  936. StorageClassParameters: parameters,
  937. }
  938. }
  939. func (key *pvKey) Features() string {
  940. // TODO: regional cluster pricing.
  941. storageClass := key.StorageClassParameters["type"]
  942. if storageClass == "pd-ssd" {
  943. storageClass = "ssd"
  944. } else if storageClass == "pd-standard" {
  945. storageClass = "pdstandard"
  946. }
  947. return key.Labels[v1.LabelZoneRegion] + "," + storageClass
  948. }
  949. type gcpKey struct {
  950. Labels map[string]string
  951. }
  952. func (gcp *GCP) GetKey(labels map[string]string) Key {
  953. return &gcpKey{
  954. Labels: labels,
  955. }
  956. }
  957. func (gcp *gcpKey) ID() string {
  958. return ""
  959. }
  960. func (gcp *gcpKey) GPUType() string {
  961. if t, ok := gcp.Labels[GKE_GPU_TAG]; ok {
  962. var usageType string
  963. if t, ok := gcp.Labels["cloud.google.com/gke-preemptible"]; ok && t == "true" {
  964. usageType = "preemptible"
  965. } else {
  966. usageType = "ondemand"
  967. }
  968. klog.V(4).Infof("GPU of type: \"%s\" found", t)
  969. return t + "," + usageType
  970. }
  971. return ""
  972. }
  973. // GetKey maps node labels to information needed to retrieve pricing data
  974. func (gcp *gcpKey) Features() string {
  975. instanceType := strings.ToLower(strings.Join(strings.Split(gcp.Labels[v1.LabelInstanceType], "-")[:2], ""))
  976. if instanceType == "n1highmem" || instanceType == "n1highcpu" {
  977. instanceType = "n1standard" // These are priced the same. TODO: support n1ultrahighmem
  978. } else if instanceType == "e2highmem" || instanceType == "e2highcpu" {
  979. instanceType = "e2standard"
  980. } else if strings.HasPrefix(instanceType, "custom") {
  981. instanceType = "custom" // The suffix of custom does not matter
  982. }
  983. region := strings.ToLower(gcp.Labels[v1.LabelZoneRegion])
  984. var usageType string
  985. if t, ok := gcp.Labels["cloud.google.com/gke-preemptible"]; ok && t == "true" {
  986. usageType = "preemptible"
  987. } else {
  988. usageType = "ondemand"
  989. }
  990. if _, ok := gcp.Labels[GKE_GPU_TAG]; ok {
  991. return region + "," + instanceType + "," + usageType + "," + "gpu"
  992. }
  993. return region + "," + instanceType + "," + usageType
  994. }
  995. // AllNodePricing returns the GCP pricing objects stored
  996. func (gcp *GCP) AllNodePricing() (interface{}, error) {
  997. gcp.DownloadPricingDataLock.RLock()
  998. defer gcp.DownloadPricingDataLock.RUnlock()
  999. return gcp.Pricing, nil
  1000. }
  1001. // NodePricing returns GCP pricing data for a single node
  1002. func (gcp *GCP) NodePricing(key Key) (*Node, error) {
  1003. gcp.DownloadPricingDataLock.RLock()
  1004. defer gcp.DownloadPricingDataLock.RUnlock()
  1005. if n, ok := gcp.Pricing[key.Features()]; ok {
  1006. klog.V(4).Infof("Returning pricing for node %s: %+v from SKU %s", key, n.Node, n.Name)
  1007. n.Node.BaseCPUPrice = gcp.BaseCPUPrice
  1008. return n.Node, nil
  1009. }
  1010. klog.V(1).Infof("[Warning] no pricing data found for %s: %s", key.Features(), key)
  1011. return nil, fmt.Errorf("Warning: no pricing data found for %s", key)
  1012. }