awsprovider.go 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229
  1. package cloud
  2. import (
  3. "bytes"
  4. "compress/gzip"
  5. "encoding/csv"
  6. "encoding/json"
  7. "fmt"
  8. "io"
  9. "io/ioutil"
  10. "log"
  11. "net/http"
  12. "os"
  13. "regexp"
  14. "strconv"
  15. "strings"
  16. "sync"
  17. "time"
  18. "k8s.io/klog"
  19. "github.com/kubecost/cost-model/pkg/clustercache"
  20. "github.com/kubecost/cost-model/pkg/errors"
  21. "github.com/kubecost/cost-model/pkg/util"
  22. "github.com/aws/aws-sdk-go/aws"
  23. "github.com/aws/aws-sdk-go/aws/awserr"
  24. "github.com/aws/aws-sdk-go/aws/credentials"
  25. "github.com/aws/aws-sdk-go/aws/credentials/stscreds"
  26. "github.com/aws/aws-sdk-go/aws/session"
  27. "github.com/aws/aws-sdk-go/service/athena"
  28. "github.com/aws/aws-sdk-go/service/ec2"
  29. "github.com/aws/aws-sdk-go/service/s3"
  30. "github.com/aws/aws-sdk-go/service/s3/s3manager"
  31. "github.com/jszwec/csvutil"
  32. v1 "k8s.io/api/core/v1"
  33. )
  34. const awsAccessKeyIDEnvVar = "AWS_ACCESS_KEY_ID"
  35. const awsAccessKeySecretEnvVar = "AWS_SECRET_ACCESS_KEY"
  36. const awsReservedInstancePricePerHour = 0.0287
  37. const supportedSpotFeedVersion = "1"
  38. const SpotInfoUpdateType = "spotinfo"
  39. const AthenaInfoUpdateType = "athenainfo"
  40. // How often spot data is refreshed
  41. const SpotRefreshDuration = 15 * time.Minute
  42. const defaultConfigPath = "/var/configs/"
  43. var awsRegions = []string{
  44. "us-east-2",
  45. "us-east-1",
  46. "us-west-1",
  47. "us-west-2",
  48. "ap-east-1",
  49. "ap-south-1",
  50. "ap-northeast-3",
  51. "ap-northeast-2",
  52. "ap-southeast-1",
  53. "ap-southeast-2",
  54. "ap-northeast-1",
  55. "ca-central-1",
  56. "cn-north-1",
  57. "cn-northwest-1",
  58. "eu-central-1",
  59. "eu-west-1",
  60. "eu-west-2",
  61. "eu-west-3",
  62. "eu-north-1",
  63. "me-south-1",
  64. "sa-east-1",
  65. "us-gov-east-1",
  66. "us-gov-west-1",
  67. }
  68. // AWS represents an Amazon Provider
  69. type AWS struct {
  70. Pricing map[string]*AWSProductTerms
  71. SpotPricingByInstanceID map[string]*spotInfo
  72. SpotPricingUpdatedAt *time.Time
  73. SpotRefreshRunning bool
  74. SpotPricingLock sync.RWMutex
  75. RIPricingByInstanceID map[string]*RIData
  76. RIDataRunning bool
  77. RIDataLock sync.RWMutex
  78. ValidPricingKeys map[string]bool
  79. Clientset clustercache.ClusterCache
  80. BaseCPUPrice string
  81. BaseRAMPrice string
  82. BaseGPUPrice string
  83. BaseSpotCPUPrice string
  84. BaseSpotRAMPrice string
  85. SpotLabelName string
  86. SpotLabelValue string
  87. ServiceKeyName string
  88. ServiceKeySecret string
  89. SpotDataRegion string
  90. SpotDataBucket string
  91. SpotDataPrefix string
  92. ProjectID string
  93. DownloadPricingDataLock sync.RWMutex
  94. Config *ProviderConfig
  95. *CustomProvider
  96. }
  97. type AWSAccessKey struct {
  98. AccessKeyID string `json:"aws_access_key_id"`
  99. SecretAccessKey string `json:"aws_secret_access_key"`
  100. }
  101. // AWSPricing maps a k8s node to an AWS Pricing "product"
  102. type AWSPricing struct {
  103. Products map[string]*AWSProduct `json:"products"`
  104. Terms AWSPricingTerms `json:"terms"`
  105. }
  106. // AWSProduct represents a purchased SKU
  107. type AWSProduct struct {
  108. Sku string `json:"sku"`
  109. Attributes AWSProductAttributes `json:"attributes"`
  110. }
  111. // AWSProductAttributes represents metadata about the product used to map to a node.
  112. type AWSProductAttributes struct {
  113. Location string `json:"location"`
  114. InstanceType string `json:"instanceType"`
  115. Memory string `json:"memory"`
  116. Storage string `json:"storage"`
  117. VCpu string `json:"vcpu"`
  118. UsageType string `json:"usagetype"`
  119. OperatingSystem string `json:"operatingSystem"`
  120. PreInstalledSw string `json:"preInstalledSw"`
  121. InstanceFamily string `json:"instanceFamily"`
  122. GPU string `json:"gpu"` // GPU represents the number of GPU on the instance
  123. }
  124. // AWSPricingTerms are how you pay for the node: OnDemand, Reserved, or (TODO) Spot
  125. type AWSPricingTerms struct {
  126. OnDemand map[string]map[string]*AWSOfferTerm `json:"OnDemand"`
  127. Reserved map[string]map[string]*AWSOfferTerm `json:"Reserved"`
  128. }
  129. // AWSOfferTerm is a sku extension used to pay for the node.
  130. type AWSOfferTerm struct {
  131. Sku string `json:"sku"`
  132. PriceDimensions map[string]*AWSRateCode `json:"priceDimensions"`
  133. }
  134. // AWSRateCode encodes data about the price of a product
  135. type AWSRateCode struct {
  136. Unit string `json:"unit"`
  137. PricePerUnit AWSCurrencyCode `json:"pricePerUnit"`
  138. }
  139. // AWSCurrencyCode is the localized currency. (TODO: support non-USD)
  140. type AWSCurrencyCode struct {
  141. USD string `json:"USD"`
  142. }
  143. // AWSProductTerms represents the full terms of the product
  144. type AWSProductTerms struct {
  145. Sku string `json:"sku"`
  146. OnDemand *AWSOfferTerm `json:"OnDemand"`
  147. Reserved *AWSOfferTerm `json:"Reserved"`
  148. Memory string `json:"memory"`
  149. Storage string `json:"storage"`
  150. VCpu string `json:"vcpu"`
  151. GPU string `json:"gpu"` // GPU represents the number of GPU on the instance
  152. PV *PV `json:"pv"`
  153. }
  154. // ClusterIdEnvVar is the environment variable in which one can manually set the ClusterId
  155. const ClusterIdEnvVar = "AWS_CLUSTER_ID"
  156. // OnDemandRateCode is appended to an node sku
  157. const OnDemandRateCode = ".JRTCKXETXF"
  158. // ReservedRateCode is appended to a node sku
  159. const ReservedRateCode = ".38NPMPTW36"
  160. // HourlyRateCode is appended to a node sku
  161. const HourlyRateCode = ".6YS6EN2CT7"
  162. // volTypes are used to map between AWS UsageTypes and
  163. // EBS volume types, as they would appear in K8s storage class
  164. // name and the EC2 API.
  165. var volTypes = map[string]string{
  166. "EBS:VolumeUsage.gp2": "gp2",
  167. "EBS:VolumeUsage": "standard",
  168. "EBS:VolumeUsage.sc1": "sc1",
  169. "EBS:VolumeP-IOPS.piops": "io1",
  170. "EBS:VolumeUsage.st1": "st1",
  171. "EBS:VolumeUsage.piops": "io1",
  172. "gp2": "EBS:VolumeUsage.gp2",
  173. "standard": "EBS:VolumeUsage",
  174. "sc1": "EBS:VolumeUsage.sc1",
  175. "io1": "EBS:VolumeUsage.piops",
  176. "st1": "EBS:VolumeUsage.st1",
  177. }
  178. // locationToRegion maps AWS region names (As they come from Billing)
  179. // to actual region identifiers
  180. var locationToRegion = map[string]string{
  181. "US East (Ohio)": "us-east-2",
  182. "US East (N. Virginia)": "us-east-1",
  183. "US West (N. California)": "us-west-1",
  184. "US West (Oregon)": "us-west-2",
  185. "Asia Pacific (Hong Kong)": "ap-east-1",
  186. "Asia Pacific (Mumbai)": "ap-south-1",
  187. "Asia Pacific (Osaka-Local)": "ap-northeast-3",
  188. "Asia Pacific (Seoul)": "ap-northeast-2",
  189. "Asia Pacific (Singapore)": "ap-southeast-1",
  190. "Asia Pacific (Sydney)": "ap-southeast-2",
  191. "Asia Pacific (Tokyo)": "ap-northeast-1",
  192. "Canada (Central)": "ca-central-1",
  193. "China (Beijing)": "cn-north-1",
  194. "China (Ningxia)": "cn-northwest-1",
  195. "EU (Frankfurt)": "eu-central-1",
  196. "EU (Ireland)": "eu-west-1",
  197. "EU (London)": "eu-west-2",
  198. "EU (Paris)": "eu-west-3",
  199. "EU (Stockholm)": "eu-north-1",
  200. "South America (Sao Paulo)": "sa-east-1",
  201. "AWS GovCloud (US-East)": "us-gov-east-1",
  202. "AWS GovCloud (US)": "us-gov-west-1",
  203. }
  204. var regionToBillingRegionCode = map[string]string{
  205. "us-east-2": "USE2",
  206. "us-east-1": "",
  207. "us-west-1": "USW1",
  208. "us-west-2": "USW2",
  209. "ap-east-1": "APE1",
  210. "ap-south-1": "APS3",
  211. "ap-northeast-3": "APN3",
  212. "ap-northeast-2": "APN2",
  213. "ap-southeast-1": "APS1",
  214. "ap-southeast-2": "APS2",
  215. "ap-northeast-1": "APN1",
  216. "ca-central-1": "CAN1",
  217. "cn-north-1": "",
  218. "cn-northwest-1": "",
  219. "eu-central-1": "EUC1",
  220. "eu-west-1": "EU",
  221. "eu-west-2": "EUW2",
  222. "eu-west-3": "EUW3",
  223. "eu-north-1": "EUN1",
  224. "sa-east-1": "SAE1",
  225. "us-gov-east-1": "UGE1",
  226. "us-gov-west-1": "UGW1",
  227. }
  228. var loadedAWSSecret bool = false
  229. var awsSecret *AWSAccessKey = nil
  230. func (aws *AWS) GetLocalStorageQuery(window, offset string, rate bool, used bool) string {
  231. return ""
  232. }
  233. // KubeAttrConversion maps the k8s labels for region to an aws region
  234. func (aws *AWS) KubeAttrConversion(location, instanceType, operatingSystem string) string {
  235. operatingSystem = strings.ToLower(operatingSystem)
  236. region := locationToRegion[location]
  237. return region + "," + instanceType + "," + operatingSystem
  238. }
  239. type AwsSpotFeedInfo struct {
  240. BucketName string `json:"bucketName"`
  241. Prefix string `json:"prefix"`
  242. Region string `json:"region"`
  243. AccountID string `json:"projectID"`
  244. ServiceKeyName string `json:"serviceKeyName"`
  245. ServiceKeySecret string `json:"serviceKeySecret"`
  246. SpotLabel string `json:"spotLabel"`
  247. SpotLabelValue string `json:"spotLabelValue"`
  248. }
  249. type AwsAthenaInfo struct {
  250. AthenaBucketName string `json:"athenaBucketName"`
  251. AthenaRegion string `json:"athenaRegion"`
  252. AthenaDatabase string `json:"athenaDatabase"`
  253. AthenaTable string `json:"athenaTable"`
  254. ServiceKeyName string `json:"serviceKeyName"`
  255. ServiceKeySecret string `json:"serviceKeySecret"`
  256. AccountID string `json:"projectID"`
  257. MasterPayerARN string `json:"masterPayerARN"`
  258. }
  259. func (aws *AWS) GetManagementPlatform() (string, error) {
  260. nodes := aws.Clientset.GetAllNodes()
  261. if len(nodes) > 0 {
  262. n := nodes[0]
  263. version := n.Status.NodeInfo.KubeletVersion
  264. if strings.Contains(version, "eks") {
  265. return "eks", nil
  266. }
  267. if _, ok := n.Labels["kops.k8s.io/instancegroup"]; ok {
  268. return "kops", nil
  269. }
  270. }
  271. return "", nil
  272. }
  273. func (aws *AWS) GetConfig() (*CustomPricing, error) {
  274. c, err := aws.Config.GetCustomPricingData()
  275. if c.Discount == "" {
  276. c.Discount = "0%"
  277. }
  278. if c.NegotiatedDiscount == "" {
  279. c.NegotiatedDiscount = "0%"
  280. }
  281. if err != nil {
  282. return nil, err
  283. }
  284. return c, nil
  285. }
  286. func (aws *AWS) UpdateConfigFromConfigMap(a map[string]string) (*CustomPricing, error) {
  287. return aws.Config.UpdateFromMap(a)
  288. }
  289. func (aws *AWS) UpdateConfig(r io.Reader, updateType string) (*CustomPricing, error) {
  290. return aws.Config.Update(func(c *CustomPricing) error {
  291. if updateType == SpotInfoUpdateType {
  292. a := AwsSpotFeedInfo{}
  293. err := json.NewDecoder(r).Decode(&a)
  294. if err != nil {
  295. return err
  296. }
  297. c.ServiceKeyName = a.ServiceKeyName
  298. if a.ServiceKeySecret != "" {
  299. c.ServiceKeySecret = a.ServiceKeySecret
  300. }
  301. c.SpotDataPrefix = a.Prefix
  302. c.SpotDataBucket = a.BucketName
  303. c.ProjectID = a.AccountID
  304. c.SpotDataRegion = a.Region
  305. c.SpotLabel = a.SpotLabel
  306. c.SpotLabelValue = a.SpotLabelValue
  307. } else if updateType == AthenaInfoUpdateType {
  308. a := AwsAthenaInfo{}
  309. err := json.NewDecoder(r).Decode(&a)
  310. if err != nil {
  311. return err
  312. }
  313. c.AthenaBucketName = a.AthenaBucketName
  314. c.AthenaRegion = a.AthenaRegion
  315. c.AthenaDatabase = a.AthenaDatabase
  316. c.AthenaTable = a.AthenaTable
  317. c.ServiceKeyName = a.ServiceKeyName
  318. if a.ServiceKeySecret != "" {
  319. c.ServiceKeySecret = a.ServiceKeySecret
  320. }
  321. if a.MasterPayerARN != "" {
  322. c.MasterPayerARN = a.MasterPayerARN
  323. }
  324. c.AthenaProjectID = a.AccountID
  325. } else {
  326. a := make(map[string]interface{})
  327. err := json.NewDecoder(r).Decode(&a)
  328. if err != nil {
  329. return err
  330. }
  331. for k, v := range a {
  332. kUpper := strings.Title(k) // Just so we consistently supply / receive the same values, uppercase the first letter.
  333. vstr, ok := v.(string)
  334. if ok {
  335. err := SetCustomPricingField(c, kUpper, vstr)
  336. if err != nil {
  337. return err
  338. }
  339. } else {
  340. sci := v.(map[string]interface{})
  341. sc := make(map[string]string)
  342. for k, val := range sci {
  343. sc[k] = val.(string)
  344. }
  345. c.SharedCosts = sc //todo: support reflection/multiple map fields
  346. }
  347. }
  348. }
  349. remoteEnabled := os.Getenv(remoteEnabled)
  350. if remoteEnabled == "true" {
  351. err := UpdateClusterMeta(os.Getenv(clusterIDKey), c.ClusterName)
  352. if err != nil {
  353. return err
  354. }
  355. }
  356. return nil
  357. })
  358. }
  359. type awsKey struct {
  360. SpotLabelName string
  361. SpotLabelValue string
  362. Labels map[string]string
  363. ProviderID string
  364. }
  365. func (k *awsKey) GPUType() string {
  366. return ""
  367. }
  368. func (k *awsKey) ID() string {
  369. provIdRx := regexp.MustCompile("aws:///([^/]+)/([^/]+)") // It's of the form aws:///us-east-2a/i-0fea4fd46592d050b and we want i-0fea4fd46592d050b, if it exists
  370. for matchNum, group := range provIdRx.FindStringSubmatch(k.ProviderID) {
  371. if matchNum == 2 {
  372. return group
  373. }
  374. }
  375. klog.V(3).Infof("Could not find instance ID in \"%s\"", k.ProviderID)
  376. return ""
  377. }
  378. func (k *awsKey) Features() string {
  379. instanceType := k.Labels[v1.LabelInstanceType]
  380. var operatingSystem string
  381. operatingSystem, ok := k.Labels[v1.LabelOSStable]
  382. if !ok {
  383. operatingSystem = k.Labels["beta.kubernetes.io/os"]
  384. }
  385. region := k.Labels[v1.LabelZoneRegion]
  386. key := region + "," + instanceType + "," + operatingSystem
  387. usageType := "preemptible"
  388. spotKey := key + "," + usageType
  389. if l, ok := k.Labels["lifecycle"]; ok && l == "EC2Spot" {
  390. return spotKey
  391. }
  392. if l, ok := k.Labels[k.SpotLabelName]; ok && l == k.SpotLabelValue {
  393. return spotKey
  394. }
  395. return key
  396. }
  397. func (aws *AWS) PVPricing(pvk PVKey) (*PV, error) {
  398. pricing, ok := aws.Pricing[pvk.Features()]
  399. if !ok {
  400. klog.V(4).Infof("Persistent Volume pricing not found for %s: %s", pvk.GetStorageClass(), pvk.Features())
  401. return &PV{}, nil
  402. }
  403. return pricing.PV, nil
  404. }
  405. type awsPVKey struct {
  406. Labels map[string]string
  407. StorageClassParameters map[string]string
  408. StorageClassName string
  409. Name string
  410. DefaultRegion string
  411. }
  412. func (aws *AWS) GetPVKey(pv *v1.PersistentVolume, parameters map[string]string, defaultRegion string) PVKey {
  413. return &awsPVKey{
  414. Labels: pv.Labels,
  415. StorageClassName: pv.Spec.StorageClassName,
  416. StorageClassParameters: parameters,
  417. Name: pv.Name,
  418. DefaultRegion: defaultRegion,
  419. }
  420. }
  421. func (key *awsPVKey) GetStorageClass() string {
  422. return key.StorageClassName
  423. }
  424. func (key *awsPVKey) Features() string {
  425. storageClass := key.StorageClassParameters["type"]
  426. if storageClass == "standard" {
  427. storageClass = "gp2"
  428. }
  429. // Storage class names are generally EBS volume types (gp2)
  430. // Keys in Pricing are based on UsageTypes (EBS:VolumeType.gp2)
  431. // Converts between the 2
  432. region := key.Labels[v1.LabelZoneRegion]
  433. //if region == "" {
  434. // region = "us-east-1"
  435. //}
  436. class, ok := volTypes[storageClass]
  437. if !ok {
  438. klog.V(4).Infof("No voltype mapping for %s's storageClass: %s", key.Name, storageClass)
  439. }
  440. return region + "," + class
  441. }
  442. // GetKey maps node labels to information needed to retrieve pricing data
  443. func (aws *AWS) GetKey(labels map[string]string, n *v1.Node) Key {
  444. return &awsKey{
  445. SpotLabelName: aws.SpotLabelName,
  446. SpotLabelValue: aws.SpotLabelValue,
  447. Labels: labels,
  448. ProviderID: labels["providerID"],
  449. }
  450. }
  451. func (aws *AWS) isPreemptible(key string) bool {
  452. s := strings.Split(key, ",")
  453. if len(s) == 4 && s[3] == "preemptible" {
  454. return true
  455. }
  456. return false
  457. }
  458. // DownloadPricingData fetches data from the AWS Pricing API
  459. func (aws *AWS) DownloadPricingData() error {
  460. aws.DownloadPricingDataLock.Lock()
  461. defer aws.DownloadPricingDataLock.Unlock()
  462. c, err := aws.Config.GetCustomPricingData()
  463. if err != nil {
  464. klog.V(1).Infof("Error downloading default pricing data: %s", err.Error())
  465. }
  466. aws.BaseCPUPrice = c.CPU
  467. aws.BaseRAMPrice = c.RAM
  468. aws.BaseGPUPrice = c.GPU
  469. aws.BaseSpotCPUPrice = c.SpotCPU
  470. aws.BaseSpotRAMPrice = c.SpotRAM
  471. aws.SpotLabelName = c.SpotLabel
  472. aws.SpotLabelValue = c.SpotLabelValue
  473. aws.SpotDataBucket = c.SpotDataBucket
  474. aws.SpotDataPrefix = c.SpotDataPrefix
  475. aws.ProjectID = c.ProjectID
  476. aws.SpotDataRegion = c.SpotDataRegion
  477. skn, sks := aws.getAWSAuth(false, c)
  478. aws.ServiceKeyName = skn
  479. aws.ServiceKeySecret = sks
  480. if len(aws.SpotDataBucket) != 0 && len(aws.ProjectID) == 0 {
  481. klog.V(1).Infof("using SpotDataBucket \"%s\" without ProjectID will not end well", aws.SpotDataBucket)
  482. }
  483. nodeList := aws.Clientset.GetAllNodes()
  484. inputkeys := make(map[string]bool)
  485. for _, n := range nodeList {
  486. labels := n.GetObjectMeta().GetLabels()
  487. key := aws.GetKey(labels, n)
  488. inputkeys[key.Features()] = true
  489. }
  490. pvList := aws.Clientset.GetAllPersistentVolumes()
  491. storageClasses := aws.Clientset.GetAllStorageClasses()
  492. storageClassMap := make(map[string]map[string]string)
  493. for _, storageClass := range storageClasses {
  494. params := storageClass.Parameters
  495. storageClassMap[storageClass.ObjectMeta.Name] = params
  496. if storageClass.GetAnnotations()["storageclass.kubernetes.io/is-default-class"] == "true" || storageClass.GetAnnotations()["storageclass.beta.kubernetes.io/is-default-class"] == "true" {
  497. storageClassMap["default"] = params
  498. storageClassMap[""] = params
  499. }
  500. }
  501. pvkeys := make(map[string]PVKey)
  502. for _, pv := range pvList {
  503. params, ok := storageClassMap[pv.Spec.StorageClassName]
  504. if !ok {
  505. klog.V(2).Infof("Unable to find params for storageClassName %s, falling back to default pricing", pv.Spec.StorageClassName)
  506. continue
  507. }
  508. key := aws.GetPVKey(pv, params, "")
  509. pvkeys[key.Features()] = key
  510. }
  511. // RIDataRunning establishes the existance of the goroutine. Since it's possible we
  512. // run multiple downloads, we don't want to create multiple go routines if one already exists
  513. if !aws.RIDataRunning && c.AthenaBucketName != "" {
  514. err = aws.GetReservationDataFromAthena() // Block until one run has completed.
  515. if err != nil {
  516. klog.V(1).Infof("Failed to lookup reserved instance data: %s", err.Error())
  517. } else { // If we make one successful run, check on new reservation data every hour
  518. go func() {
  519. defer errors.HandlePanic()
  520. aws.RIDataRunning = true
  521. for {
  522. klog.Infof("Reserved Instance watcher running... next update in 1h")
  523. time.Sleep(time.Hour)
  524. err := aws.GetReservationDataFromAthena()
  525. if err != nil {
  526. klog.Infof("Error updating RI data: %s", err.Error())
  527. }
  528. }
  529. }()
  530. }
  531. }
  532. aws.Pricing = make(map[string]*AWSProductTerms)
  533. aws.ValidPricingKeys = make(map[string]bool)
  534. skusToKeys := make(map[string]string)
  535. pricingURL := "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json"
  536. klog.V(2).Infof("starting download of \"%s\", which is quite large ...", pricingURL)
  537. resp, err := http.Get(pricingURL)
  538. if err != nil {
  539. klog.V(2).Infof("Bogus fetch of \"%s\": %v", pricingURL, err)
  540. return err
  541. }
  542. klog.V(2).Infof("Finished downloading \"%s\"", pricingURL)
  543. dec := json.NewDecoder(resp.Body)
  544. for {
  545. t, err := dec.Token()
  546. if err == io.EOF {
  547. klog.V(2).Infof("done loading \"%s\"\n", pricingURL)
  548. break
  549. }
  550. if t == "products" {
  551. _, err := dec.Token() // this should parse the opening "{""
  552. if err != nil {
  553. return err
  554. }
  555. for dec.More() {
  556. _, err := dec.Token() // the sku token
  557. if err != nil {
  558. return err
  559. }
  560. product := &AWSProduct{}
  561. err = dec.Decode(&product)
  562. if err != nil {
  563. klog.V(1).Infof("Error parsing response from \"%s\": %v", pricingURL, err.Error())
  564. break
  565. }
  566. if product.Attributes.PreInstalledSw == "NA" &&
  567. (strings.HasPrefix(product.Attributes.UsageType, "BoxUsage") || strings.Contains(product.Attributes.UsageType, "-BoxUsage")) {
  568. key := aws.KubeAttrConversion(product.Attributes.Location, product.Attributes.InstanceType, product.Attributes.OperatingSystem)
  569. spotKey := key + ",preemptible"
  570. if inputkeys[key] || inputkeys[spotKey] { // Just grab the sku even if spot, and change the price later.
  571. productTerms := &AWSProductTerms{
  572. Sku: product.Sku,
  573. Memory: product.Attributes.Memory,
  574. Storage: product.Attributes.Storage,
  575. VCpu: product.Attributes.VCpu,
  576. GPU: product.Attributes.GPU,
  577. }
  578. aws.Pricing[key] = productTerms
  579. aws.Pricing[spotKey] = productTerms
  580. skusToKeys[product.Sku] = key
  581. }
  582. aws.ValidPricingKeys[key] = true
  583. aws.ValidPricingKeys[spotKey] = true
  584. } else if strings.Contains(product.Attributes.UsageType, "EBS:Volume") {
  585. // UsageTypes may be prefixed with a region code - we're removing this when using
  586. // volTypes to keep lookups generic
  587. usageTypeRegx := regexp.MustCompile(".*(-|^)(EBS.+)")
  588. usageTypeMatch := usageTypeRegx.FindStringSubmatch(product.Attributes.UsageType)
  589. usageTypeNoRegion := usageTypeMatch[len(usageTypeMatch)-1]
  590. key := locationToRegion[product.Attributes.Location] + "," + usageTypeNoRegion
  591. spotKey := key + ",preemptible"
  592. pv := &PV{
  593. Class: volTypes[usageTypeNoRegion],
  594. Region: locationToRegion[product.Attributes.Location],
  595. }
  596. productTerms := &AWSProductTerms{
  597. Sku: product.Sku,
  598. PV: pv,
  599. }
  600. aws.Pricing[key] = productTerms
  601. aws.Pricing[spotKey] = productTerms
  602. skusToKeys[product.Sku] = key
  603. aws.ValidPricingKeys[key] = true
  604. aws.ValidPricingKeys[spotKey] = true
  605. }
  606. }
  607. }
  608. if t == "terms" {
  609. _, err := dec.Token() // this should parse the opening "{""
  610. if err != nil {
  611. return err
  612. }
  613. termType, err := dec.Token()
  614. if err != nil {
  615. return err
  616. }
  617. if termType == "OnDemand" {
  618. _, err := dec.Token()
  619. if err != nil { // again, should parse an opening "{"
  620. return err
  621. }
  622. for dec.More() {
  623. sku, err := dec.Token()
  624. if err != nil {
  625. return err
  626. }
  627. _, err = dec.Token() // another opening "{"
  628. if err != nil {
  629. return err
  630. }
  631. skuOnDemand, err := dec.Token()
  632. if err != nil {
  633. return err
  634. }
  635. offerTerm := &AWSOfferTerm{}
  636. err = dec.Decode(&offerTerm)
  637. if err != nil {
  638. klog.V(1).Infof("Error decoding AWS Offer Term: " + err.Error())
  639. }
  640. if sku.(string)+OnDemandRateCode == skuOnDemand {
  641. key, ok := skusToKeys[sku.(string)]
  642. spotKey := key + ",preemptible"
  643. if ok {
  644. aws.Pricing[key].OnDemand = offerTerm
  645. aws.Pricing[spotKey].OnDemand = offerTerm
  646. if strings.Contains(key, "EBS:VolumeP-IOPS.piops") {
  647. // If the specific UsageType is the per IO cost used on io1 volumes
  648. // we need to add the per IO cost to the io1 PV cost
  649. cost := offerTerm.PriceDimensions[sku.(string)+OnDemandRateCode+HourlyRateCode].PricePerUnit.USD
  650. // Add the per IO cost to the PV object for the io1 volume type
  651. aws.Pricing[key].PV.CostPerIO = cost
  652. } else if strings.Contains(key, "EBS:Volume") {
  653. // If volume, we need to get hourly cost and add it to the PV object
  654. cost := offerTerm.PriceDimensions[sku.(string)+OnDemandRateCode+HourlyRateCode].PricePerUnit.USD
  655. costFloat, _ := strconv.ParseFloat(cost, 64)
  656. hourlyPrice := costFloat / 730
  657. aws.Pricing[key].PV.Cost = strconv.FormatFloat(hourlyPrice, 'f', -1, 64)
  658. }
  659. }
  660. }
  661. _, err = dec.Token()
  662. if err != nil {
  663. return err
  664. }
  665. }
  666. _, err = dec.Token()
  667. if err != nil {
  668. return err
  669. }
  670. }
  671. }
  672. }
  673. // Always run spot pricing refresh when performing download
  674. aws.refreshSpotPricing(true)
  675. // Only start a single refresh goroutine
  676. if !aws.SpotRefreshRunning {
  677. aws.SpotRefreshRunning = true
  678. go func() {
  679. defer errors.HandlePanic()
  680. for {
  681. klog.Infof("Spot Pricing Refresh scheduled in %.2f minutes.", SpotRefreshDuration.Minutes())
  682. time.Sleep(SpotRefreshDuration)
  683. // Reoccurring refresh checks update times
  684. aws.refreshSpotPricing(false)
  685. }
  686. }()
  687. }
  688. return nil
  689. }
  690. func (aws *AWS) refreshSpotPricing(force bool) {
  691. aws.SpotPricingLock.Lock()
  692. defer aws.SpotPricingLock.Unlock()
  693. now := time.Now().UTC()
  694. updateTime := now.Add(-SpotRefreshDuration)
  695. // Return if there was an update time set and an hour hasn't elapsed
  696. if !force && aws.SpotPricingUpdatedAt != nil && aws.SpotPricingUpdatedAt.After(updateTime) {
  697. return
  698. }
  699. sp, err := parseSpotData(aws.SpotDataBucket, aws.SpotDataPrefix, aws.ProjectID, aws.SpotDataRegion, aws.ServiceKeyName, aws.ServiceKeySecret)
  700. if err != nil {
  701. klog.V(1).Infof("Skipping AWS spot data download: %s", err.Error())
  702. return
  703. }
  704. // update time last updated
  705. aws.SpotPricingUpdatedAt = &now
  706. aws.SpotPricingByInstanceID = sp
  707. }
  708. // Stubbed NetworkPricing for AWS. Pull directly from aws.json for now
  709. func (aws *AWS) NetworkPricing() (*Network, error) {
  710. cpricing, err := aws.Config.GetCustomPricingData()
  711. if err != nil {
  712. return nil, err
  713. }
  714. znec, err := strconv.ParseFloat(cpricing.ZoneNetworkEgress, 64)
  715. if err != nil {
  716. return nil, err
  717. }
  718. rnec, err := strconv.ParseFloat(cpricing.RegionNetworkEgress, 64)
  719. if err != nil {
  720. return nil, err
  721. }
  722. inec, err := strconv.ParseFloat(cpricing.InternetNetworkEgress, 64)
  723. if err != nil {
  724. return nil, err
  725. }
  726. return &Network{
  727. ZoneNetworkEgressCost: znec,
  728. RegionNetworkEgressCost: rnec,
  729. InternetNetworkEgressCost: inec,
  730. }, nil
  731. }
  732. // AllNodePricing returns all the billing data fetched.
  733. func (aws *AWS) AllNodePricing() (interface{}, error) {
  734. aws.DownloadPricingDataLock.RLock()
  735. defer aws.DownloadPricingDataLock.RUnlock()
  736. return aws.Pricing, nil
  737. }
  738. func (aws *AWS) spotPricing(instanceID string) (*spotInfo, bool) {
  739. aws.SpotPricingLock.RLock()
  740. defer aws.SpotPricingLock.RUnlock()
  741. info, ok := aws.SpotPricingByInstanceID[instanceID]
  742. return info, ok
  743. }
  744. func (aws *AWS) reservedInstancePricing(instanceID string) (*RIData, bool) {
  745. aws.RIDataLock.RLock()
  746. defer aws.RIDataLock.RUnlock()
  747. data, ok := aws.RIPricingByInstanceID[instanceID]
  748. return data, ok
  749. }
  750. func (aws *AWS) createNode(terms *AWSProductTerms, usageType string, k Key) (*Node, error) {
  751. key := k.Features()
  752. if spotInfo, ok := aws.spotPricing(k.ID()); ok {
  753. var spotcost string
  754. klog.V(3).Infof("Looking up spot data from feed for node %s", k.ID())
  755. arr := strings.Split(spotInfo.Charge, " ")
  756. if len(arr) == 2 {
  757. spotcost = arr[0]
  758. } else {
  759. klog.V(2).Infof("Spot data for node %s is missing", k.ID())
  760. }
  761. return &Node{
  762. Cost: spotcost,
  763. VCPU: terms.VCpu,
  764. RAM: terms.Memory,
  765. GPU: terms.GPU,
  766. Storage: terms.Storage,
  767. BaseCPUPrice: aws.BaseCPUPrice,
  768. BaseRAMPrice: aws.BaseRAMPrice,
  769. BaseGPUPrice: aws.BaseGPUPrice,
  770. UsageType: usageType,
  771. }, nil
  772. } else if aws.isPreemptible(key) { // Preemptible but we don't have any data in the pricing report.
  773. klog.Infof("Node %s marked preemitible but we have no data in spot feed", k.ID())
  774. return &Node{
  775. VCPU: terms.VCpu,
  776. VCPUCost: aws.BaseSpotCPUPrice,
  777. RAM: terms.Memory,
  778. GPU: terms.GPU,
  779. RAMCost: aws.BaseSpotRAMPrice,
  780. Storage: terms.Storage,
  781. BaseCPUPrice: aws.BaseCPUPrice,
  782. BaseRAMPrice: aws.BaseRAMPrice,
  783. BaseGPUPrice: aws.BaseGPUPrice,
  784. UsageType: usageType,
  785. }, nil
  786. } else if ri, ok := aws.reservedInstancePricing(k.ID()); ok {
  787. strCost := fmt.Sprintf("%f", ri.EffectiveCost)
  788. return &Node{
  789. Cost: strCost,
  790. VCPU: terms.VCpu,
  791. RAM: terms.Memory,
  792. GPU: terms.GPU,
  793. Storage: terms.Storage,
  794. BaseCPUPrice: aws.BaseCPUPrice,
  795. BaseRAMPrice: aws.BaseRAMPrice,
  796. BaseGPUPrice: aws.BaseGPUPrice,
  797. UsageType: usageType,
  798. }, nil
  799. }
  800. c, ok := terms.OnDemand.PriceDimensions[terms.Sku+OnDemandRateCode+HourlyRateCode]
  801. if !ok {
  802. return nil, fmt.Errorf("Could not fetch data for \"%s\"", k.ID())
  803. }
  804. cost := c.PricePerUnit.USD
  805. return &Node{
  806. Cost: cost,
  807. VCPU: terms.VCpu,
  808. RAM: terms.Memory,
  809. GPU: terms.GPU,
  810. Storage: terms.Storage,
  811. BaseCPUPrice: aws.BaseCPUPrice,
  812. BaseRAMPrice: aws.BaseRAMPrice,
  813. BaseGPUPrice: aws.BaseGPUPrice,
  814. UsageType: usageType,
  815. }, nil
  816. }
  817. // NodePricing takes in a key from GetKey and returns a Node object for use in building the cost model.
  818. func (aws *AWS) NodePricing(k Key) (*Node, error) {
  819. aws.DownloadPricingDataLock.RLock()
  820. defer aws.DownloadPricingDataLock.RUnlock()
  821. key := k.Features()
  822. usageType := "ondemand"
  823. if aws.isPreemptible(key) {
  824. usageType = "preemptible"
  825. }
  826. terms, ok := aws.Pricing[key]
  827. if ok {
  828. return aws.createNode(terms, usageType, k)
  829. } else if _, ok := aws.ValidPricingKeys[key]; ok {
  830. aws.DownloadPricingDataLock.RUnlock()
  831. err := aws.DownloadPricingData()
  832. aws.DownloadPricingDataLock.RLock()
  833. if err != nil {
  834. return &Node{
  835. Cost: aws.BaseCPUPrice,
  836. BaseCPUPrice: aws.BaseCPUPrice,
  837. BaseRAMPrice: aws.BaseRAMPrice,
  838. BaseGPUPrice: aws.BaseGPUPrice,
  839. UsageType: usageType,
  840. UsesBaseCPUPrice: true,
  841. }, err
  842. }
  843. terms, termsOk := aws.Pricing[key]
  844. if !termsOk {
  845. return &Node{
  846. Cost: aws.BaseCPUPrice,
  847. BaseCPUPrice: aws.BaseCPUPrice,
  848. BaseRAMPrice: aws.BaseRAMPrice,
  849. BaseGPUPrice: aws.BaseGPUPrice,
  850. UsageType: usageType,
  851. UsesBaseCPUPrice: true,
  852. }, fmt.Errorf("Unable to find any Pricing data for \"%s\"", key)
  853. }
  854. return aws.createNode(terms, usageType, k)
  855. } else { // Fall back to base pricing if we can't find the key.
  856. klog.V(1).Infof("Invalid Pricing Key \"%s\"", key)
  857. return &Node{
  858. Cost: aws.BaseCPUPrice,
  859. BaseCPUPrice: aws.BaseCPUPrice,
  860. BaseRAMPrice: aws.BaseRAMPrice,
  861. BaseGPUPrice: aws.BaseGPUPrice,
  862. UsageType: usageType,
  863. UsesBaseCPUPrice: true,
  864. }, nil
  865. }
  866. }
  867. // ClusterInfo returns an object that represents the cluster. TODO: actually return the name of the cluster. Blocked on cluster federation.
  868. func (awsProvider *AWS) ClusterInfo() (map[string]string, error) {
  869. defaultClusterName := "AWS Cluster #1"
  870. c, err := awsProvider.GetConfig()
  871. if err != nil {
  872. return nil, err
  873. }
  874. remote := os.Getenv(remoteEnabled)
  875. remoteEnabled := false
  876. if os.Getenv(remote) == "true" {
  877. remoteEnabled = true
  878. }
  879. if c.ClusterName != "" {
  880. m := make(map[string]string)
  881. m["name"] = c.ClusterName
  882. m["provider"] = "AWS"
  883. m["id"] = os.Getenv(clusterIDKey)
  884. m["remoteReadEnabled"] = strconv.FormatBool(remoteEnabled)
  885. return m, nil
  886. }
  887. makeStructure := func(clusterName string) (map[string]string, error) {
  888. klog.V(2).Infof("Returning \"%s\" as ClusterName", clusterName)
  889. m := make(map[string]string)
  890. m["name"] = clusterName
  891. m["provider"] = "AWS"
  892. m["id"] = os.Getenv(clusterIDKey)
  893. m["remoteReadEnabled"] = strconv.FormatBool(remoteEnabled)
  894. return m, nil
  895. }
  896. maybeClusterId := os.Getenv(ClusterIdEnvVar)
  897. if len(maybeClusterId) != 0 {
  898. return makeStructure(maybeClusterId)
  899. }
  900. // TODO: This should be cached, it can take a long time to hit the API
  901. //provIdRx := regexp.MustCompile("aws:///([^/]+)/([^/]+)")
  902. //clusterIdRx := regexp.MustCompile("^kubernetes\\.io/cluster/([^/]+)")
  903. //klog.Infof("nodelist get here %s", time.Now())
  904. //nodeList := awsProvider.Clientset.GetAllNodes()
  905. //klog.Infof("nodelist done here %s", time.Now())
  906. /*for _, n := range nodeList {
  907. region := ""
  908. instanceId := ""
  909. providerId := n.Spec.ProviderID
  910. for matchNum, group := range provIdRx.FindStringSubmatch(providerId) {
  911. if matchNum == 1 {
  912. region = group
  913. } else if matchNum == 2 {
  914. instanceId = group
  915. }
  916. }
  917. if len(instanceId) == 0 {
  918. klog.V(2).Infof("Unable to decode Node.ProviderID \"%s\", skipping it", providerId)
  919. continue
  920. }
  921. c := &aws.Config{
  922. Region: aws.String(region),
  923. }
  924. s := session.Must(session.NewSession(c))
  925. ec2Svc := ec2.New(s)
  926. di, diErr := ec2Svc.DescribeInstances(&ec2.DescribeInstancesInput{
  927. InstanceIds: []*string{
  928. aws.String(instanceId),
  929. },
  930. })
  931. if diErr != nil {
  932. klog.Infof("Error describing instances: %s", diErr)
  933. continue
  934. }
  935. if len(di.Reservations) != 1 {
  936. klog.V(2).Infof("Expected 1 Reservation back from DescribeInstances(%s), received %d", instanceId, len(di.Reservations))
  937. continue
  938. }
  939. res := di.Reservations[0]
  940. if len(res.Instances) != 1 {
  941. klog.V(2).Infof("Expected 1 Instance back from DescribeInstances(%s), received %d", instanceId, len(res.Instances))
  942. continue
  943. }
  944. inst := res.Instances[0]
  945. for _, tag := range inst.Tags {
  946. tagKey := *tag.Key
  947. for matchNum, group := range clusterIdRx.FindStringSubmatch(tagKey) {
  948. if matchNum != 1 {
  949. continue
  950. }
  951. return makeStructure(group)
  952. }
  953. }
  954. }*/
  955. klog.V(2).Infof("Unable to sniff out cluster ID, perhaps set $%s to force one", ClusterIdEnvVar)
  956. return makeStructure(defaultClusterName)
  957. }
  958. // Gets the aws key id and secret
  959. func (aws *AWS) getAWSAuth(forceReload bool, cp *CustomPricing) (string, string) {
  960. // 1. Check config values first (set from frontend UI)
  961. if cp.ServiceKeyName != "" && cp.ServiceKeySecret != "" {
  962. return cp.ServiceKeyName, cp.ServiceKeySecret
  963. }
  964. // 2. Check for secret
  965. s, _ := aws.loadAWSAuthSecret(forceReload)
  966. if s != nil && s.AccessKeyID != "" && s.SecretAccessKey != "" {
  967. return s.AccessKeyID, s.SecretAccessKey
  968. }
  969. // 3. Fall back to env vars
  970. return os.Getenv(awsAccessKeyIDEnvVar), os.Getenv(awsAccessKeySecretEnvVar)
  971. }
  972. // Load once and cache the result (even on failure). This is an install time secret, so
  973. // we don't expect the secret to change. If it does, however, we can force reload using
  974. // the input parameter.
  975. func (aws *AWS) loadAWSAuthSecret(force bool) (*AWSAccessKey, error) {
  976. if !force && loadedAWSSecret {
  977. return awsSecret, nil
  978. }
  979. loadedAWSSecret = true
  980. exists, err := util.FileExists(authSecretPath)
  981. if !exists || err != nil {
  982. return nil, fmt.Errorf("Failed to locate service account file: %s", authSecretPath)
  983. }
  984. result, err := ioutil.ReadFile(authSecretPath)
  985. if err != nil {
  986. return nil, err
  987. }
  988. var ak AWSAccessKey
  989. err = json.Unmarshal(result, &ak)
  990. if err != nil {
  991. return nil, err
  992. }
  993. awsSecret = &ak
  994. return awsSecret, nil
  995. }
  996. func (aws *AWS) configureAWSAuth() error {
  997. accessKeyID := aws.ServiceKeyName
  998. accessKeySecret := aws.ServiceKeySecret
  999. if accessKeyID != "" && accessKeySecret != "" { // credentials may exist on the actual AWS node-- if so, use those. If not, override with the service key
  1000. err := os.Setenv(awsAccessKeyIDEnvVar, accessKeyID)
  1001. if err != nil {
  1002. return err
  1003. }
  1004. err = os.Setenv(awsAccessKeySecretEnvVar, accessKeySecret)
  1005. if err != nil {
  1006. return err
  1007. }
  1008. }
  1009. return nil
  1010. }
  1011. func getClusterConfig(ccFile string) (map[string]string, error) {
  1012. clusterConfig, err := os.Open(ccFile)
  1013. if err != nil {
  1014. return nil, err
  1015. }
  1016. defer clusterConfig.Close()
  1017. b, err := ioutil.ReadAll(clusterConfig)
  1018. if err != nil {
  1019. return nil, err
  1020. }
  1021. var clusterConf map[string]string
  1022. err = json.Unmarshal([]byte(b), &clusterConf)
  1023. if err != nil {
  1024. return nil, err
  1025. }
  1026. return clusterConf, nil
  1027. }
  1028. // SetKeyEnv ensures that the two environment variables necessary to configure
  1029. // a new AWS Session are set.
  1030. func (a *AWS) SetKeyEnv() error {
  1031. // TODO add this to the helm chart, mirroring the cost-model
  1032. // configPath := os.Getenv("CONFIG_PATH")
  1033. configPath := defaultConfigPath
  1034. path := configPath + "aws.json"
  1035. if _, err := os.Stat(path); err != nil {
  1036. if os.IsNotExist(err) {
  1037. log.Printf("error: file %s does not exist", path)
  1038. } else {
  1039. log.Printf("error: %s", err)
  1040. }
  1041. return err
  1042. }
  1043. jsonFile, err := os.Open(path)
  1044. defer jsonFile.Close()
  1045. configMap := map[string]string{}
  1046. configBytes, err := ioutil.ReadAll(jsonFile)
  1047. if err != nil {
  1048. return err
  1049. }
  1050. json.Unmarshal([]byte(configBytes), &configMap)
  1051. keyName := configMap["awsServiceKeyName"]
  1052. keySecret := configMap["awsServiceKeySecret"]
  1053. // These are required before calling NewEnvCredentials below
  1054. os.Setenv("AWS_ACCESS_KEY_ID", keyName)
  1055. os.Setenv("AWS_SECRET_ACCESS_KEY", keySecret)
  1056. return nil
  1057. }
  1058. func (a *AWS) getAddressesForRegion(region string) (*ec2.DescribeAddressesOutput, error) {
  1059. sess, err := session.NewSession(&aws.Config{
  1060. Region: aws.String(region),
  1061. Credentials: credentials.NewEnvCredentials(),
  1062. })
  1063. if err != nil {
  1064. return nil, err
  1065. }
  1066. ec2Svc := ec2.New(sess)
  1067. return ec2Svc.DescribeAddresses(&ec2.DescribeAddressesInput{})
  1068. }
  1069. func (a *AWS) GetAddresses() ([]byte, error) {
  1070. if err := a.SetKeyEnv(); err != nil {
  1071. return nil, err
  1072. }
  1073. addressCh := make(chan *ec2.DescribeAddressesOutput, len(awsRegions))
  1074. errorCh := make(chan error, len(awsRegions))
  1075. var wg sync.WaitGroup
  1076. wg.Add(len(awsRegions))
  1077. // Get volumes from each AWS region
  1078. for _, r := range awsRegions {
  1079. // Fetch IP address response and send results and errors to their
  1080. // respective channels
  1081. go func(region string) {
  1082. defer wg.Done()
  1083. defer errors.HandlePanic()
  1084. // Query for first page of volume results
  1085. resp, err := a.getAddressesForRegion(region)
  1086. if err != nil {
  1087. if aerr, ok := err.(awserr.Error); ok {
  1088. switch aerr.Code() {
  1089. default:
  1090. errorCh <- aerr
  1091. }
  1092. return
  1093. } else {
  1094. errorCh <- err
  1095. return
  1096. }
  1097. }
  1098. addressCh <- resp
  1099. }(r)
  1100. }
  1101. // Close the result channels after everything has been sent
  1102. go func() {
  1103. defer errors.HandlePanic()
  1104. wg.Wait()
  1105. close(errorCh)
  1106. close(addressCh)
  1107. }()
  1108. addresses := []*ec2.Address{}
  1109. for adds := range addressCh {
  1110. addresses = append(addresses, adds.Addresses...)
  1111. }
  1112. errors := []error{}
  1113. for err := range errorCh {
  1114. log.Printf("[Warning]: unable to get addresses: %s", err)
  1115. errors = append(errors, err)
  1116. }
  1117. // Return error if no addresses are returned
  1118. if len(errors) > 0 && len(addresses) == 0 {
  1119. return nil, fmt.Errorf("%d error(s) retrieving addresses: %v", len(errors), errors)
  1120. }
  1121. // Format the response this way to match the JSON-encoded formatting of a single response
  1122. // from DescribeAddresss, so that consumers can always expect AWS disk responses to have
  1123. // a "Addresss" key at the top level.
  1124. return json.Marshal(map[string][]*ec2.Address{
  1125. "Addresses": addresses,
  1126. })
  1127. }
  1128. func (a *AWS) getDisksForRegion(region string, maxResults int64, nextToken *string) (*ec2.DescribeVolumesOutput, error) {
  1129. sess, err := session.NewSession(&aws.Config{
  1130. Region: aws.String(region),
  1131. Credentials: credentials.NewEnvCredentials(),
  1132. })
  1133. if err != nil {
  1134. return nil, err
  1135. }
  1136. ec2Svc := ec2.New(sess)
  1137. return ec2Svc.DescribeVolumes(&ec2.DescribeVolumesInput{
  1138. MaxResults: &maxResults,
  1139. NextToken: nextToken,
  1140. })
  1141. }
  1142. // GetDisks returns the AWS disks backing PVs. Useful because sometimes k8s will not clean up PVs correctly. Requires a json config in /var/configs with key region.
  1143. func (a *AWS) GetDisks() ([]byte, error) {
  1144. if err := a.SetKeyEnv(); err != nil {
  1145. return nil, err
  1146. }
  1147. volumeCh := make(chan *ec2.DescribeVolumesOutput, len(awsRegions))
  1148. errorCh := make(chan error, len(awsRegions))
  1149. var wg sync.WaitGroup
  1150. wg.Add(len(awsRegions))
  1151. // Get volumes from each AWS region
  1152. for _, r := range awsRegions {
  1153. // Fetch volume response and send results and errors to their
  1154. // respective channels
  1155. go func(region string) {
  1156. defer wg.Done()
  1157. defer errors.HandlePanic()
  1158. // Query for first page of volume results
  1159. resp, err := a.getDisksForRegion(region, 1000, nil)
  1160. if err != nil {
  1161. if aerr, ok := err.(awserr.Error); ok {
  1162. switch aerr.Code() {
  1163. default:
  1164. errorCh <- aerr
  1165. }
  1166. return
  1167. } else {
  1168. errorCh <- err
  1169. return
  1170. }
  1171. }
  1172. volumeCh <- resp
  1173. // A NextToken indicates more pages of results. Keep querying
  1174. // until all pages are retrieved.
  1175. for resp.NextToken != nil {
  1176. resp, err = a.getDisksForRegion(region, 100, resp.NextToken)
  1177. if err != nil {
  1178. if aerr, ok := err.(awserr.Error); ok {
  1179. switch aerr.Code() {
  1180. default:
  1181. errorCh <- aerr
  1182. }
  1183. return
  1184. } else {
  1185. errorCh <- err
  1186. return
  1187. }
  1188. }
  1189. volumeCh <- resp
  1190. }
  1191. }(r)
  1192. }
  1193. // Close the result channels after everything has been sent
  1194. go func() {
  1195. defer errors.HandlePanic()
  1196. wg.Wait()
  1197. close(errorCh)
  1198. close(volumeCh)
  1199. }()
  1200. volumes := []*ec2.Volume{}
  1201. for vols := range volumeCh {
  1202. volumes = append(volumes, vols.Volumes...)
  1203. }
  1204. errors := []error{}
  1205. for err := range errorCh {
  1206. log.Printf("[Warning]: unable to get disks: %s", err)
  1207. errors = append(errors, err)
  1208. }
  1209. // Return error if no volumes are returned
  1210. if len(errors) > 0 && len(volumes) == 0 {
  1211. return nil, fmt.Errorf("%d error(s) retrieving volumes: %v", len(errors), errors)
  1212. }
  1213. // Format the response this way to match the JSON-encoded formatting of a single response
  1214. // from DescribeVolumes, so that consumers can always expect AWS disk responses to have
  1215. // a "Volumes" key at the top level.
  1216. return json.Marshal(map[string][]*ec2.Volume{
  1217. "Volumes": volumes,
  1218. })
  1219. }
  1220. // ConvertToGlueColumnFormat takes a string and runs through various regex
  1221. // and string replacement statements to convert it to a format compatible
  1222. // with AWS Glue and Athena column names.
  1223. // Following guidance from AWS provided here ('Column Names' section):
  1224. // https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/run-athena-sql.html
  1225. // It returns a string containing the column name in proper column name format and length.
  1226. func ConvertToGlueColumnFormat(column_name string) string {
  1227. klog.V(5).Infof("Converting string \"%s\" to proper AWS Glue column name.", column_name)
  1228. // An underscore is added in front of uppercase letters
  1229. capital_underscore := regexp.MustCompile(`[A-Z]`)
  1230. final := capital_underscore.ReplaceAllString(column_name, `_$0`)
  1231. // Any non-alphanumeric characters are replaced with an underscore
  1232. no_space_punc := regexp.MustCompile(`[\s]{1,}|[^A-Za-z0-9]`)
  1233. final = no_space_punc.ReplaceAllString(final, "_")
  1234. // Duplicate underscores are removed
  1235. no_dup_underscore := regexp.MustCompile(`_{2,}`)
  1236. final = no_dup_underscore.ReplaceAllString(final, "_")
  1237. // Any leading and trailing underscores are removed
  1238. no_front_end_underscore := regexp.MustCompile(`(^\_|\_$)`)
  1239. final = no_front_end_underscore.ReplaceAllString(final, "")
  1240. // Uppercase to lowercase
  1241. final = strings.ToLower(final)
  1242. // Longer column name than expected - remove _ left to right
  1243. allowed_col_len := 128
  1244. undersc_to_remove := len(final) - allowed_col_len
  1245. if undersc_to_remove > 0 {
  1246. final = strings.Replace(final, "_", "", undersc_to_remove)
  1247. }
  1248. // If removing all of the underscores still didn't
  1249. // make the column name < 128 characters, trim it!
  1250. if len(final) > allowed_col_len {
  1251. final = final[:allowed_col_len]
  1252. }
  1253. klog.V(5).Infof("Column name being returned: \"%s\". Length: \"%d\".", final, len(final))
  1254. return final
  1255. }
  1256. func generateAWSGroupBy(lastIdx int) string {
  1257. sequence := []string{}
  1258. for i := 1; i < lastIdx+1; i++ {
  1259. sequence = append(sequence, strconv.Itoa(i))
  1260. }
  1261. return strings.Join(sequence, ",")
  1262. }
  1263. func (a *AWS) QueryAthenaBillingData(query string) (*athena.GetQueryResultsOutput, error) {
  1264. customPricing, err := a.GetConfig()
  1265. if err != nil {
  1266. return nil, err
  1267. }
  1268. if customPricing.ServiceKeyName != "" {
  1269. err = os.Setenv(awsAccessKeyIDEnvVar, customPricing.ServiceKeyName)
  1270. if err != nil {
  1271. return nil, err
  1272. }
  1273. err = os.Setenv(awsAccessKeySecretEnvVar, customPricing.ServiceKeySecret)
  1274. if err != nil {
  1275. return nil, err
  1276. }
  1277. }
  1278. region := aws.String(customPricing.AthenaRegion)
  1279. resultsBucket := customPricing.AthenaBucketName
  1280. database := customPricing.AthenaDatabase
  1281. c := &aws.Config{
  1282. Region: region,
  1283. }
  1284. s := session.Must(session.NewSession(c))
  1285. svc := athena.New(s)
  1286. if customPricing.MasterPayerARN != "" {
  1287. creds := stscreds.NewCredentials(s, customPricing.MasterPayerARN)
  1288. svc = athena.New(s, &aws.Config{
  1289. Region: region,
  1290. Credentials: creds,
  1291. })
  1292. }
  1293. var e athena.StartQueryExecutionInput
  1294. var r athena.ResultConfiguration
  1295. r.SetOutputLocation(resultsBucket)
  1296. e.SetResultConfiguration(&r)
  1297. e.SetQueryString(query)
  1298. var q athena.QueryExecutionContext
  1299. q.SetDatabase(database)
  1300. e.SetQueryExecutionContext(&q)
  1301. res, err := svc.StartQueryExecution(&e)
  1302. if err != nil {
  1303. return nil, err
  1304. }
  1305. klog.V(2).Infof("StartQueryExecution result:")
  1306. klog.V(2).Infof(res.GoString())
  1307. var qri athena.GetQueryExecutionInput
  1308. qri.SetQueryExecutionId(*res.QueryExecutionId)
  1309. var qrop *athena.GetQueryExecutionOutput
  1310. duration := time.Duration(2) * time.Second // Pause for 2 seconds
  1311. for {
  1312. qrop, err = svc.GetQueryExecution(&qri)
  1313. if err != nil {
  1314. return nil, err
  1315. }
  1316. if *qrop.QueryExecution.Status.State != "RUNNING" && *qrop.QueryExecution.Status.State != "QUEUED" {
  1317. break
  1318. }
  1319. time.Sleep(duration)
  1320. }
  1321. if *qrop.QueryExecution.Status.State == "SUCCEEDED" {
  1322. var ip athena.GetQueryResultsInput
  1323. ip.SetQueryExecutionId(*res.QueryExecutionId)
  1324. return svc.GetQueryResults(&ip)
  1325. } else {
  1326. return nil, fmt.Errorf("No results available for %s", query)
  1327. }
  1328. }
  1329. type RIData struct {
  1330. ResourceID string
  1331. EffectiveCost float64
  1332. ReservationARN string
  1333. MostRecentDate string
  1334. }
  1335. func (a *AWS) GetReservationDataFromAthena() error {
  1336. cfg, err := a.GetConfig()
  1337. if err != nil {
  1338. return err
  1339. }
  1340. if cfg.AthenaBucketName == "" {
  1341. return fmt.Errorf("No Athena Bucket configured")
  1342. }
  1343. if a.RIPricingByInstanceID == nil {
  1344. a.RIPricingByInstanceID = make(map[string]*RIData)
  1345. }
  1346. tNow := time.Now()
  1347. tOneDayAgo := tNow.Add(time.Duration(-25) * time.Hour) // Also get files from one day ago to avoid boundary conditions
  1348. start := tOneDayAgo.Format("2006-01-02")
  1349. end := tNow.Format("2006-01-02")
  1350. q := `SELECT
  1351. line_item_usage_start_date,
  1352. reservation_reservation_a_r_n,
  1353. line_item_resource_id,
  1354. reservation_effective_cost
  1355. FROM %s as cost_data
  1356. WHERE line_item_usage_start_date BETWEEN date '%s' AND date '%s'
  1357. AND reservation_reservation_a_r_n <> '' ORDER BY
  1358. line_item_usage_start_date DESC`
  1359. query := fmt.Sprintf(q, cfg.AthenaTable, start, end)
  1360. op, err := a.QueryAthenaBillingData(query)
  1361. if err != nil {
  1362. return fmt.Errorf("Error fetching Reserved Instance Data: %s", err)
  1363. }
  1364. klog.Infof("Fetching RI data...")
  1365. if len(op.ResultSet.Rows) > 1 {
  1366. a.RIDataLock.Lock()
  1367. mostRecentDate := ""
  1368. for _, r := range op.ResultSet.Rows[1:(len(op.ResultSet.Rows) - 1)] {
  1369. d := *r.Data[0].VarCharValue
  1370. if mostRecentDate == "" {
  1371. mostRecentDate = d
  1372. } else if mostRecentDate != d { // Get all most recent assignments
  1373. break
  1374. }
  1375. cost, err := strconv.ParseFloat(*r.Data[3].VarCharValue, 64)
  1376. if err != nil {
  1377. klog.Infof("Error converting `%s` from float ", *r.Data[3].VarCharValue)
  1378. }
  1379. r := &RIData{
  1380. ResourceID: *r.Data[2].VarCharValue,
  1381. EffectiveCost: cost,
  1382. ReservationARN: *r.Data[1].VarCharValue,
  1383. MostRecentDate: d,
  1384. }
  1385. a.RIPricingByInstanceID[r.ResourceID] = r
  1386. }
  1387. klog.V(1).Infof("Found %d reserved instances", len(a.RIPricingByInstanceID))
  1388. for k, r := range a.RIPricingByInstanceID {
  1389. klog.V(1).Infof("Reserved Instance Data found for node %s : %f at time %s", k, r.EffectiveCost, r.MostRecentDate)
  1390. }
  1391. a.RIDataLock.Unlock()
  1392. } else {
  1393. klog.Infof("No reserved instance data found")
  1394. }
  1395. return nil
  1396. }
  1397. // ExternalAllocations represents tagged assets outside the scope of kubernetes.
  1398. // "start" and "end" are dates of the format YYYY-MM-DD
  1399. // "aggregator" is the tag used to determine how to allocate those assets, ie namespace, pod, etc.
  1400. func (a *AWS) ExternalAllocations(start string, end string, aggregators []string, filterType string, filterValue string, crossCluster bool) ([]*OutOfClusterAllocation, error) {
  1401. customPricing, err := a.GetConfig()
  1402. if err != nil {
  1403. return nil, err
  1404. }
  1405. formattedAggregators := []string{}
  1406. for _, agg := range aggregators {
  1407. aggregator_column_name := "resource_tags_user_" + agg
  1408. aggregator_column_name = ConvertToGlueColumnFormat(aggregator_column_name)
  1409. formattedAggregators = append(formattedAggregators, aggregator_column_name)
  1410. }
  1411. aggregatorNames := strings.Join(formattedAggregators, ",")
  1412. aggregatorOr := strings.Join(formattedAggregators, " <> '' OR ")
  1413. aggregatorOr = aggregatorOr + " <> ''"
  1414. filter_column_name := "resource_tags_user_" + filterType
  1415. filter_column_name = ConvertToGlueColumnFormat(filter_column_name)
  1416. var query string
  1417. var lastIdx int
  1418. if filterType != "kubernetes_" { // This gets appended upstream and is equivalent to no filter.
  1419. lastIdx = len(formattedAggregators) + 3
  1420. groupby := generateAWSGroupBy(lastIdx)
  1421. query = fmt.Sprintf(`SELECT
  1422. CAST(line_item_usage_start_date AS DATE) as start_date,
  1423. %s,
  1424. line_item_product_code,
  1425. %s,
  1426. SUM(line_item_blended_cost) as blended_cost
  1427. FROM %s as cost_data
  1428. WHERE (%s='%s') AND line_item_usage_start_date BETWEEN date '%s' AND date '%s' AND (%s)
  1429. GROUP BY %s`, aggregatorNames, filter_column_name, customPricing.AthenaTable, filter_column_name, filterValue, start, end, aggregatorOr, groupby)
  1430. } else {
  1431. lastIdx = len(formattedAggregators) + 2
  1432. groupby := generateAWSGroupBy(lastIdx)
  1433. query = fmt.Sprintf(`SELECT
  1434. CAST(line_item_usage_start_date AS DATE) as start_date,
  1435. %s,
  1436. line_item_product_code,
  1437. SUM(line_item_blended_cost) as blended_cost
  1438. FROM %s as cost_data
  1439. WHERE line_item_usage_start_date BETWEEN date '%s' AND date '%s' AND (%s)
  1440. GROUP BY %s`, aggregatorNames, customPricing.AthenaTable, start, end, aggregatorOr, groupby)
  1441. }
  1442. klog.V(3).Infof("Running Query: %s", query)
  1443. if customPricing.ServiceKeyName != "" {
  1444. err = os.Setenv(awsAccessKeyIDEnvVar, customPricing.ServiceKeyName)
  1445. if err != nil {
  1446. return nil, err
  1447. }
  1448. err = os.Setenv(awsAccessKeySecretEnvVar, customPricing.ServiceKeySecret)
  1449. if err != nil {
  1450. return nil, err
  1451. }
  1452. }
  1453. region := aws.String(customPricing.AthenaRegion)
  1454. resultsBucket := customPricing.AthenaBucketName
  1455. database := customPricing.AthenaDatabase
  1456. c := &aws.Config{
  1457. Region: region,
  1458. }
  1459. s := session.Must(session.NewSession(c))
  1460. svc := athena.New(s)
  1461. var e athena.StartQueryExecutionInput
  1462. var r athena.ResultConfiguration
  1463. r.SetOutputLocation(resultsBucket)
  1464. e.SetResultConfiguration(&r)
  1465. e.SetQueryString(query)
  1466. var q athena.QueryExecutionContext
  1467. q.SetDatabase(database)
  1468. e.SetQueryExecutionContext(&q)
  1469. res, err := svc.StartQueryExecution(&e)
  1470. if err != nil {
  1471. return nil, err
  1472. }
  1473. klog.V(2).Infof("StartQueryExecution result:")
  1474. klog.V(2).Infof(res.GoString())
  1475. var qri athena.GetQueryExecutionInput
  1476. qri.SetQueryExecutionId(*res.QueryExecutionId)
  1477. var qrop *athena.GetQueryExecutionOutput
  1478. duration := time.Duration(2) * time.Second // Pause for 2 seconds
  1479. for {
  1480. qrop, err = svc.GetQueryExecution(&qri)
  1481. if err != nil {
  1482. return nil, err
  1483. }
  1484. if *qrop.QueryExecution.Status.State != "RUNNING" && *qrop.QueryExecution.Status.State != "QUEUED" {
  1485. break
  1486. }
  1487. time.Sleep(duration)
  1488. }
  1489. var oocAllocs []*OutOfClusterAllocation
  1490. if *qrop.QueryExecution.Status.State == "SUCCEEDED" {
  1491. var ip athena.GetQueryResultsInput
  1492. ip.SetQueryExecutionId(*res.QueryExecutionId)
  1493. op, err := svc.GetQueryResults(&ip)
  1494. if err != nil {
  1495. return nil, err
  1496. }
  1497. if len(op.ResultSet.Rows) > 1 {
  1498. for _, r := range op.ResultSet.Rows[1:(len(op.ResultSet.Rows))] {
  1499. cost, err := strconv.ParseFloat(*r.Data[lastIdx].VarCharValue, 64)
  1500. if err != nil {
  1501. return nil, err
  1502. }
  1503. environment := ""
  1504. for _, d := range r.Data[1 : len(formattedAggregators)+1] {
  1505. if *d.VarCharValue != "" {
  1506. environment = *d.VarCharValue // just set to the first nonempty match
  1507. }
  1508. break
  1509. }
  1510. ooc := &OutOfClusterAllocation{
  1511. Aggregator: strings.Join(aggregators, ","),
  1512. Environment: environment,
  1513. Service: *r.Data[len(formattedAggregators)+1].VarCharValue,
  1514. Cost: cost,
  1515. }
  1516. oocAllocs = append(oocAllocs, ooc)
  1517. }
  1518. } else {
  1519. klog.V(1).Infof("No results available for %s at database %s between %s and %s", strings.Join(formattedAggregators, ","), customPricing.AthenaTable, start, end)
  1520. }
  1521. }
  1522. if customPricing.BillingDataDataset != "" && !crossCluster { // There is GCP data, meaning someone has tried to configure a GCP out-of-cluster allocation.
  1523. gcp, err := NewCrossClusterProvider("gcp", "aws.json", a.Clientset)
  1524. if err != nil {
  1525. klog.Infof("Could not instantiate cross-cluster provider %s", err.Error())
  1526. }
  1527. gcpOOC, err := gcp.ExternalAllocations(start, end, aggregators, filterType, filterValue, true)
  1528. if err != nil {
  1529. klog.Infof("Could not fetch cross-cluster costs %s", err.Error())
  1530. }
  1531. oocAllocs = append(oocAllocs, gcpOOC...)
  1532. }
  1533. return oocAllocs, nil
  1534. }
  1535. // QuerySQL can query a properly configured Athena database.
  1536. // Used to fetch billing data.
  1537. // Requires a json config in /var/configs with key region, output, and database.
  1538. func (a *AWS) QuerySQL(query string) ([]byte, error) {
  1539. customPricing, err := a.GetConfig()
  1540. if err != nil {
  1541. return nil, err
  1542. }
  1543. if customPricing.ServiceKeyName != "" {
  1544. err = os.Setenv(awsAccessKeyIDEnvVar, customPricing.ServiceKeyName)
  1545. if err != nil {
  1546. return nil, err
  1547. }
  1548. err = os.Setenv(awsAccessKeySecretEnvVar, customPricing.ServiceKeySecret)
  1549. if err != nil {
  1550. return nil, err
  1551. }
  1552. }
  1553. athenaConfigs, err := os.Open("/var/configs/athena.json")
  1554. if err != nil {
  1555. return nil, err
  1556. }
  1557. defer athenaConfigs.Close()
  1558. b, err := ioutil.ReadAll(athenaConfigs)
  1559. if err != nil {
  1560. return nil, err
  1561. }
  1562. var athenaConf map[string]string
  1563. json.Unmarshal([]byte(b), &athenaConf)
  1564. region := aws.String(customPricing.AthenaRegion)
  1565. resultsBucket := customPricing.AthenaBucketName
  1566. database := customPricing.AthenaDatabase
  1567. c := &aws.Config{
  1568. Region: region,
  1569. }
  1570. s := session.Must(session.NewSession(c))
  1571. svc := athena.New(s)
  1572. var e athena.StartQueryExecutionInput
  1573. var r athena.ResultConfiguration
  1574. r.SetOutputLocation(resultsBucket)
  1575. e.SetResultConfiguration(&r)
  1576. e.SetQueryString(query)
  1577. var q athena.QueryExecutionContext
  1578. q.SetDatabase(database)
  1579. e.SetQueryExecutionContext(&q)
  1580. res, err := svc.StartQueryExecution(&e)
  1581. if err != nil {
  1582. return nil, err
  1583. }
  1584. klog.V(2).Infof("StartQueryExecution result:")
  1585. klog.V(2).Infof(res.GoString())
  1586. var qri athena.GetQueryExecutionInput
  1587. qri.SetQueryExecutionId(*res.QueryExecutionId)
  1588. var qrop *athena.GetQueryExecutionOutput
  1589. duration := time.Duration(2) * time.Second // Pause for 2 seconds
  1590. for {
  1591. qrop, err = svc.GetQueryExecution(&qri)
  1592. if err != nil {
  1593. return nil, err
  1594. }
  1595. if *qrop.QueryExecution.Status.State != "RUNNING" && *qrop.QueryExecution.Status.State != "QUEUED" {
  1596. break
  1597. }
  1598. time.Sleep(duration)
  1599. }
  1600. if *qrop.QueryExecution.Status.State == "SUCCEEDED" {
  1601. var ip athena.GetQueryResultsInput
  1602. ip.SetQueryExecutionId(*res.QueryExecutionId)
  1603. op, err := svc.GetQueryResults(&ip)
  1604. if err != nil {
  1605. return nil, err
  1606. }
  1607. b, err := json.Marshal(op.ResultSet)
  1608. if err != nil {
  1609. return nil, err
  1610. }
  1611. return b, nil
  1612. }
  1613. return nil, fmt.Errorf("Error getting query results : %s", *qrop.QueryExecution.Status.State)
  1614. }
  1615. type spotInfo struct {
  1616. Timestamp string `csv:"Timestamp"`
  1617. UsageType string `csv:"UsageType"`
  1618. Operation string `csv:"Operation"`
  1619. InstanceID string `csv:"InstanceID"`
  1620. MyBidID string `csv:"MyBidID"`
  1621. MyMaxPrice string `csv:"MyMaxPrice"`
  1622. MarketPrice string `csv:"MarketPrice"`
  1623. Charge string `csv:"Charge"`
  1624. Version string `csv:"Version"`
  1625. }
  1626. type fnames []*string
  1627. func (f fnames) Len() int {
  1628. return len(f)
  1629. }
  1630. func (f fnames) Swap(i, j int) {
  1631. f[i], f[j] = f[j], f[i]
  1632. }
  1633. func (f fnames) Less(i, j int) bool {
  1634. key1 := strings.Split(*f[i], ".")
  1635. key2 := strings.Split(*f[j], ".")
  1636. t1, err := time.Parse("2006-01-02-15", key1[1])
  1637. if err != nil {
  1638. klog.V(1).Info("Unable to parse timestamp" + key1[1])
  1639. return false
  1640. }
  1641. t2, err := time.Parse("2006-01-02-15", key2[1])
  1642. if err != nil {
  1643. klog.V(1).Info("Unable to parse timestamp" + key2[1])
  1644. return false
  1645. }
  1646. return t1.Before(t2)
  1647. }
  1648. func parseSpotData(bucket string, prefix string, projectID string, region string, accessKeyID string, accessKeySecret string) (map[string]*spotInfo, error) {
  1649. // credentials may exist on the actual AWS node-- if so, use those. If not, override with the service key
  1650. if accessKeyID != "" && accessKeySecret != "" {
  1651. err := os.Setenv(awsAccessKeyIDEnvVar, accessKeyID)
  1652. if err != nil {
  1653. return nil, err
  1654. }
  1655. err = os.Setenv(awsAccessKeySecretEnvVar, accessKeySecret)
  1656. if err != nil {
  1657. return nil, err
  1658. }
  1659. }
  1660. s3Prefix := projectID
  1661. if len(prefix) != 0 {
  1662. s3Prefix = prefix + "/" + s3Prefix
  1663. }
  1664. c := aws.NewConfig().WithRegion(region)
  1665. s := session.Must(session.NewSession(c))
  1666. s3Svc := s3.New(s)
  1667. downloader := s3manager.NewDownloaderWithClient(s3Svc)
  1668. tNow := time.Now()
  1669. tOneDayAgo := tNow.Add(time.Duration(-24) * time.Hour) // Also get files from one day ago to avoid boundary conditions
  1670. ls := &s3.ListObjectsInput{
  1671. Bucket: aws.String(bucket),
  1672. Prefix: aws.String(s3Prefix + "." + tOneDayAgo.Format("2006-01-02")),
  1673. }
  1674. ls2 := &s3.ListObjectsInput{
  1675. Bucket: aws.String(bucket),
  1676. Prefix: aws.String(s3Prefix + "." + tNow.Format("2006-01-02")),
  1677. }
  1678. lso, err := s3Svc.ListObjects(ls)
  1679. if err != nil {
  1680. return nil, err
  1681. }
  1682. lsoLen := len(lso.Contents)
  1683. klog.V(2).Infof("Found %d spot data files from yesterday", lsoLen)
  1684. if lsoLen == 0 {
  1685. klog.V(5).Infof("ListObjects \"s3://%s/%s\" produced no keys", *ls.Bucket, *ls.Prefix)
  1686. }
  1687. lso2, err := s3Svc.ListObjects(ls2)
  1688. if err != nil {
  1689. return nil, err
  1690. }
  1691. lso2Len := len(lso2.Contents)
  1692. klog.V(2).Infof("Found %d spot data files from today", lso2Len)
  1693. if lso2Len == 0 {
  1694. klog.V(5).Infof("ListObjects \"s3://%s/%s\" produced no keys", *ls2.Bucket, *ls2.Prefix)
  1695. }
  1696. // TODO: Worth it to use LastModifiedDate to determine if we should reparse the spot data?
  1697. var keys []*string
  1698. for _, obj := range lso.Contents {
  1699. keys = append(keys, obj.Key)
  1700. }
  1701. for _, obj := range lso2.Contents {
  1702. keys = append(keys, obj.Key)
  1703. }
  1704. versionRx := regexp.MustCompile("^#Version: (\\d+)\\.\\d+$")
  1705. header, err := csvutil.Header(spotInfo{}, "csv")
  1706. if err != nil {
  1707. return nil, err
  1708. }
  1709. fieldsPerRecord := len(header)
  1710. spots := make(map[string]*spotInfo)
  1711. for _, key := range keys {
  1712. getObj := &s3.GetObjectInput{
  1713. Bucket: aws.String(bucket),
  1714. Key: key,
  1715. }
  1716. buf := aws.NewWriteAtBuffer([]byte{})
  1717. _, err := downloader.Download(buf, getObj)
  1718. if err != nil {
  1719. return nil, err
  1720. }
  1721. r := bytes.NewReader(buf.Bytes())
  1722. gr, err := gzip.NewReader(r)
  1723. if err != nil {
  1724. return nil, err
  1725. }
  1726. csvReader := csv.NewReader(gr)
  1727. csvReader.Comma = '\t'
  1728. csvReader.FieldsPerRecord = fieldsPerRecord
  1729. dec, err := csvutil.NewDecoder(csvReader, header...)
  1730. if err != nil {
  1731. return nil, err
  1732. }
  1733. var foundVersion string
  1734. for {
  1735. spot := spotInfo{}
  1736. err := dec.Decode(&spot)
  1737. csvParseErr, isCsvParseErr := err.(*csv.ParseError)
  1738. if err == io.EOF {
  1739. break
  1740. } else if err == csvutil.ErrFieldCount || (isCsvParseErr && csvParseErr.Err == csv.ErrFieldCount) {
  1741. rec := dec.Record()
  1742. // the first two "Record()" will be the comment lines
  1743. // and they show up as len() == 1
  1744. // the first of which is "#Version"
  1745. // the second of which is "#Fields: "
  1746. if len(rec) != 1 {
  1747. klog.V(2).Infof("Expected %d spot info fields but received %d: %s", fieldsPerRecord, len(rec), rec)
  1748. continue
  1749. }
  1750. if len(foundVersion) == 0 {
  1751. spotFeedVersion := rec[0]
  1752. klog.V(4).Infof("Spot feed version is \"%s\"", spotFeedVersion)
  1753. matches := versionRx.FindStringSubmatch(spotFeedVersion)
  1754. if matches != nil {
  1755. foundVersion = matches[1]
  1756. if foundVersion != supportedSpotFeedVersion {
  1757. klog.V(2).Infof("Unsupported spot info feed version: wanted \"%s\" got \"%s\"", supportedSpotFeedVersion, foundVersion)
  1758. break
  1759. }
  1760. }
  1761. continue
  1762. } else if strings.Index(rec[0], "#") == 0 {
  1763. continue
  1764. } else {
  1765. klog.V(3).Infof("skipping non-TSV line: %s", rec)
  1766. continue
  1767. }
  1768. } else if err != nil {
  1769. klog.V(2).Infof("Error during spot info decode: %+v", err)
  1770. continue
  1771. }
  1772. klog.V(1).Infof("Found spot info for: %s", spot.InstanceID)
  1773. spots[spot.InstanceID] = &spot
  1774. }
  1775. gr.Close()
  1776. }
  1777. return spots, nil
  1778. }
  1779. func (a *AWS) ApplyReservedInstancePricing(nodes map[string]*Node) {
  1780. /*
  1781. numReserved := len(a.ReservedInstances)
  1782. // Early return if no reserved instance data loaded
  1783. if numReserved == 0 {
  1784. klog.V(4).Infof("[Reserved] No Reserved Instances")
  1785. return
  1786. }
  1787. cfg, err := a.GetConfig()
  1788. defaultCPU, err := strconv.ParseFloat(cfg.CPU, 64)
  1789. if err != nil {
  1790. klog.V(3).Infof("Could not parse default cpu price")
  1791. defaultCPU = 0.031611
  1792. }
  1793. defaultRAM, err := strconv.ParseFloat(cfg.RAM, 64)
  1794. if err != nil {
  1795. klog.V(3).Infof("Could not parse default ram price")
  1796. defaultRAM = 0.004237
  1797. }
  1798. cpuToRAMRatio := defaultCPU / defaultRAM
  1799. now := time.Now()
  1800. instances := make(map[string][]*AWSReservedInstance)
  1801. for _, r := range a.ReservedInstances {
  1802. if now.Before(r.StartDate) || now.After(r.EndDate) {
  1803. klog.V(1).Infof("[Reserved] Skipped Reserved Instance due to dates")
  1804. continue
  1805. }
  1806. _, ok := instances[r.Region]
  1807. if !ok {
  1808. instances[r.Region] = []*AWSReservedInstance{r}
  1809. } else {
  1810. instances[r.Region] = append(instances[r.Region], r)
  1811. }
  1812. }
  1813. awsNodes := make(map[string]*v1.Node)
  1814. currentNodes := a.Clientset.GetAllNodes()
  1815. // Create a node name -> node map
  1816. for _, awsNode := range currentNodes {
  1817. awsNodes[awsNode.GetName()] = awsNode
  1818. }
  1819. // go through all provider nodes using k8s nodes for region
  1820. for nodeName, node := range nodes {
  1821. // Reset reserved allocation to prevent double allocation
  1822. node.Reserved = nil
  1823. kNode, ok := awsNodes[nodeName]
  1824. if !ok {
  1825. klog.V(1).Infof("[Reserved] Could not find K8s Node with name: %s", nodeName)
  1826. continue
  1827. }
  1828. nodeRegion, ok := kNode.Labels[v1.LabelZoneRegion]
  1829. if !ok {
  1830. klog.V(1).Infof("[Reserved] Could not find node region")
  1831. continue
  1832. }
  1833. reservedInstances, ok := instances[nodeRegion]
  1834. if !ok {
  1835. klog.V(1).Infof("[Reserved] Could not find counters for region: %s", nodeRegion)
  1836. continue
  1837. }
  1838. // Determine the InstanceType of the node
  1839. instanceType, ok := kNode.Labels["beta.kubernetes.io/instance-type"]
  1840. if !ok {
  1841. continue
  1842. }
  1843. ramBytes, err := strconv.ParseFloat(node.RAMBytes, 64)
  1844. if err != nil {
  1845. continue
  1846. }
  1847. ramGB := ramBytes / 1024 / 1024 / 1024
  1848. cpu, err := strconv.ParseFloat(node.VCPU, 64)
  1849. if err != nil {
  1850. continue
  1851. }
  1852. ramMultiple := cpu*cpuToRAMRatio + ramGB
  1853. node.Reserved = &ReservedInstanceData{
  1854. ReservedCPU: 0,
  1855. ReservedRAM: 0,
  1856. }
  1857. for i, reservedInstance := range reservedInstances {
  1858. if reservedInstance.InstanceType == instanceType {
  1859. // Use < 0 to mark as ALL
  1860. node.Reserved.ReservedCPU = -1
  1861. node.Reserved.ReservedRAM = -1
  1862. // Set Costs based on CPU/RAM ratios
  1863. ramPrice := reservedInstance.PricePerHour / ramMultiple
  1864. node.Reserved.CPUCost = ramPrice * cpuToRAMRatio
  1865. node.Reserved.RAMCost = ramPrice
  1866. // Remove the reserve from the temporary slice to prevent
  1867. // being reallocated
  1868. instances[nodeRegion] = append(reservedInstances[:i], reservedInstances[i+1:]...)
  1869. break
  1870. }
  1871. }
  1872. }*/
  1873. }
  1874. type AWSReservedInstance struct {
  1875. Zone string
  1876. Region string
  1877. InstanceType string
  1878. InstanceCount int64
  1879. InstanceTenacy string
  1880. StartDate time.Time
  1881. EndDate time.Time
  1882. PricePerHour float64
  1883. }
  1884. func (ari *AWSReservedInstance) String() string {
  1885. return fmt.Sprintf("[Zone: %s, Region: %s, Type: %s, Count: %d, Tenacy: %s, Start: %+v, End: %+v, Price: %f]", ari.Zone, ari.Region, ari.InstanceType, ari.InstanceCount, ari.InstanceTenacy, ari.StartDate, ari.EndDate, ari.PricePerHour)
  1886. }
  1887. func isReservedInstanceHourlyPrice(rc *ec2.RecurringCharge) bool {
  1888. return rc != nil && rc.Frequency != nil && *rc.Frequency == "Hourly"
  1889. }
  1890. func getReservedInstancePrice(ri *ec2.ReservedInstances) (float64, error) {
  1891. var pricePerHour float64
  1892. if len(ri.RecurringCharges) > 0 {
  1893. for _, rc := range ri.RecurringCharges {
  1894. if isReservedInstanceHourlyPrice(rc) {
  1895. pricePerHour = *rc.Amount
  1896. break
  1897. }
  1898. }
  1899. }
  1900. // If we're still unable to resolve hourly price, try fixed -> hourly
  1901. if pricePerHour == 0 {
  1902. if ri.Duration != nil && ri.FixedPrice != nil {
  1903. var durHours float64
  1904. durSeconds := float64(*ri.Duration)
  1905. fixedPrice := float64(*ri.FixedPrice)
  1906. if durSeconds != 0 && fixedPrice != 0 {
  1907. durHours = durSeconds / 60 / 60
  1908. pricePerHour = fixedPrice / durHours
  1909. }
  1910. }
  1911. }
  1912. if pricePerHour == 0 {
  1913. return 0, fmt.Errorf("Failed to resolve an hourly price from FixedPrice or Recurring Costs")
  1914. }
  1915. return pricePerHour, nil
  1916. }
  1917. func getRegionReservedInstances(region string) ([]*AWSReservedInstance, error) {
  1918. c := &aws.Config{
  1919. Region: aws.String(region),
  1920. }
  1921. s := session.Must(session.NewSession(c))
  1922. svc := ec2.New(s)
  1923. response, err := svc.DescribeReservedInstances(&ec2.DescribeReservedInstancesInput{})
  1924. if err != nil {
  1925. return nil, err
  1926. }
  1927. var reservedInstances []*AWSReservedInstance
  1928. for _, ri := range response.ReservedInstances {
  1929. var zone string
  1930. if ri.AvailabilityZone != nil {
  1931. zone = *ri.AvailabilityZone
  1932. }
  1933. pricePerHour, err := getReservedInstancePrice(ri)
  1934. if err != nil {
  1935. klog.V(1).Infof("Error Resolving Price: %s", err.Error())
  1936. continue
  1937. }
  1938. reservedInstances = append(reservedInstances, &AWSReservedInstance{
  1939. Zone: zone,
  1940. Region: region,
  1941. InstanceType: *ri.InstanceType,
  1942. InstanceCount: *ri.InstanceCount,
  1943. InstanceTenacy: *ri.InstanceTenancy,
  1944. StartDate: *ri.Start,
  1945. EndDate: *ri.End,
  1946. PricePerHour: pricePerHour,
  1947. })
  1948. }
  1949. return reservedInstances, nil
  1950. }
  1951. func (a *AWS) getReservedInstances() ([]*AWSReservedInstance, error) {
  1952. err := a.configureAWSAuth()
  1953. if err != nil {
  1954. return nil, fmt.Errorf("Error Configuring aws auth: %s", err.Error())
  1955. }
  1956. var reservedInstances []*AWSReservedInstance
  1957. nodes := a.Clientset.GetAllNodes()
  1958. regionsSeen := make(map[string]bool)
  1959. for _, node := range nodes {
  1960. region, ok := node.Labels[v1.LabelZoneRegion]
  1961. if !ok {
  1962. continue
  1963. }
  1964. if regionsSeen[region] {
  1965. continue
  1966. }
  1967. ris, err := getRegionReservedInstances(region)
  1968. if err != nil {
  1969. klog.V(3).Infof("Error getting reserved instances: %s", err.Error())
  1970. continue
  1971. }
  1972. regionsSeen[region] = true
  1973. reservedInstances = append(reservedInstances, ris...)
  1974. }
  1975. return reservedInstances, nil
  1976. }