server.go 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218
  1. package mcp
  2. import (
  3. "context"
  4. "crypto/rand"
  5. "encoding/hex"
  6. "fmt"
  7. "strings"
  8. "sync"
  9. "time"
  10. "github.com/go-playground/validator/v10"
  11. "github.com/opencost/opencost/core/pkg/filter"
  12. "github.com/opencost/opencost/core/pkg/filter/allocation"
  13. cloudcostfilter "github.com/opencost/opencost/core/pkg/filter/cloudcost"
  14. "github.com/opencost/opencost/core/pkg/log"
  15. "github.com/opencost/opencost/core/pkg/opencost"
  16. models "github.com/opencost/opencost/pkg/cloud/models"
  17. "github.com/opencost/opencost/pkg/cloudcost"
  18. "github.com/opencost/opencost/pkg/costmodel"
  19. )
  20. // QueryType defines the type of query to be executed.
  21. type QueryType string
  22. const (
  23. AllocationQueryType QueryType = "allocation"
  24. AssetQueryType QueryType = "asset"
  25. CloudCostQueryType QueryType = "cloudcost"
  26. EfficiencyQueryType QueryType = "efficiency"
  27. )
  28. // Efficiency calculation constants
  29. const (
  30. efficiencyBufferMultiplier = 1.2 // 20% headroom for stability
  31. efficiencyMinCPU = 0.001 // minimum CPU cores
  32. efficiencyMinRAM = 1024 * 1024 // 1 MB minimum RAM
  33. )
  34. // MCPRequest represents a single turn in a conversation with the OpenCost MCP server.
  35. type MCPRequest struct {
  36. SessionID string `json:"sessionId"`
  37. Query *OpenCostQueryRequest `json:"query"`
  38. }
  39. // MCPResponse is the response from the OpenCost MCP server for a single turn.
  40. type MCPResponse struct {
  41. Data interface{} `json:"data"`
  42. QueryInfo QueryMetadata `json:"queryInfo"`
  43. }
  44. // QueryMetadata contains metadata about the query execution.
  45. type QueryMetadata struct {
  46. QueryID string `json:"queryId"`
  47. Timestamp time.Time `json:"timestamp"`
  48. ProcessingTime time.Duration `json:"processingTime"`
  49. }
  50. // OpenCostQueryRequest provides a unified interface for all OpenCost query types.
  51. type OpenCostQueryRequest struct {
  52. QueryType QueryType `json:"queryType" validate:"required,oneof=allocation asset cloudcost efficiency"`
  53. Window string `json:"window" validate:"required"`
  54. AllocationParams *AllocationQuery `json:"allocationParams,omitempty"`
  55. AssetParams *AssetQuery `json:"assetParams,omitempty"`
  56. CloudCostParams *CloudCostQuery `json:"cloudCostParams,omitempty"`
  57. EfficiencyParams *EfficiencyQuery `json:"efficiencyParams,omitempty"`
  58. }
  59. // AllocationQuery contains the parameters for an allocation query.
  60. type AllocationQuery struct {
  61. Step time.Duration `json:"step,omitempty"`
  62. Accumulate bool `json:"accumulate,omitempty"`
  63. ShareIdle bool `json:"shareIdle,omitempty"`
  64. Aggregate string `json:"aggregate,omitempty"`
  65. IncludeIdle bool `json:"includeIdle,omitempty"`
  66. IdleByNode bool `json:"idleByNode,omitempty"`
  67. IncludeProportionalAssetResourceCosts bool `json:"includeProportionalAssetResourceCosts,omitempty"`
  68. IncludeAggregatedMetadata bool `json:"includeAggregatedMetadata,omitempty"`
  69. ShareLB bool `json:"sharelb,omitempty"`
  70. Filter string `json:"filter,omitempty"` // Filter expression for allocations (e.g., "cluster:production", "namespace:kube-system")
  71. }
  72. // AssetQuery contains the parameters for an asset query.
  73. type AssetQuery struct {
  74. // Currently no specific parameters needed for asset queries as it only takes window as parameter
  75. }
  76. // CloudCostQuery contains the parameters for a cloud cost query.
  77. type CloudCostQuery struct {
  78. Aggregate string `json:"aggregate,omitempty"` // Comma-separated list of aggregation properties
  79. Accumulate string `json:"accumulate,omitempty"` // e.g., "week", "day", "month"
  80. Filter string `json:"filter,omitempty"` // Filter expression for cloud costs
  81. Provider string `json:"provider,omitempty"` // Cloud provider filter (aws, gcp, azure, etc.)
  82. Service string `json:"service,omitempty"` // Service filter (ec2, s3, compute, etc.)
  83. Category string `json:"category,omitempty"` // Category filter (compute, storage, network, etc.)
  84. Region string `json:"region,omitempty"` // Region filter
  85. // Additional explicit fields for filtering
  86. AccountID string `json:"accountID,omitempty"` // Alias of Account; maps to accountID
  87. InvoiceEntityID string `json:"invoiceEntityID,omitempty"` // Invoice entity ID filter
  88. ProviderID string `json:"providerID,omitempty"` // Cloud provider resource ID filter
  89. Labels map[string]string `json:"labels,omitempty"` // Label filters (key->value)
  90. }
  91. // EfficiencyQuery contains the parameters for an efficiency query.
  92. type EfficiencyQuery struct {
  93. Aggregate string `json:"aggregate,omitempty"` // Aggregation properties (e.g., "pod", "namespace", "controller")
  94. Filter string `json:"filter,omitempty"` // Filter expression for allocations (same as AllocationQuery)
  95. EfficiencyBufferMultiplier *float64 `json:"efficiencyBufferMultiplier,omitempty"` // Buffer multiplier for recommendations (default: 1.2 for 20% headroom)
  96. }
  97. // AllocationResponse represents the allocation data returned to the AI agent.
  98. type AllocationResponse struct {
  99. // The allocation data, as a map of allocation sets.
  100. Allocations map[string]*AllocationSet `json:"allocations"`
  101. }
  102. // AllocationSet represents a set of allocation data.
  103. type AllocationSet struct {
  104. // The name of the allocation set.
  105. Name string `json:"name"`
  106. Properties map[string]string `json:"properties"`
  107. Allocations []*Allocation `json:"allocations"`
  108. }
  109. // TotalCost calculates the total cost of all allocations in the set.
  110. func (as *AllocationSet) TotalCost() float64 {
  111. var total float64
  112. for _, alloc := range as.Allocations {
  113. total += alloc.TotalCost
  114. }
  115. return total
  116. }
  117. // Allocation represents a single allocation data point.
  118. type Allocation struct {
  119. Name string `json:"name"` // Allocation key (namespace, cluster, etc.)
  120. CPUCost float64 `json:"cpuCost"` // Cost of CPU usage
  121. GPUCost float64 `json:"gpuCost"` // Cost of GPU usage
  122. RAMCost float64 `json:"ramCost"` // Cost of memory usage
  123. PVCost float64 `json:"pvCost"` // Cost of persistent volumes
  124. NetworkCost float64 `json:"networkCost"` // Cost of network usage
  125. SharedCost float64 `json:"sharedCost"` // Shared/unallocated costs assigned here
  126. ExternalCost float64 `json:"externalCost"` // External costs (cloud services, etc.)
  127. TotalCost float64 `json:"totalCost"` // Sum of all costs above
  128. CPUCoreHours float64 `json:"cpuCoreHours"` // Usage metrics: CPU core-hours
  129. RAMByteHours float64 `json:"ramByteHours"` // Usage metrics: RAM byte-hours
  130. GPUHours float64 `json:"gpuHours"` // Usage metrics: GPU-hours
  131. PVByteHours float64 `json:"pvByteHours"` // Usage metrics: PV byte-hours
  132. Start time.Time `json:"start"` // Start timestamp for this allocation
  133. End time.Time `json:"end"` // End timestamp for this allocation
  134. }
  135. // AssetResponse represents the asset data returned to the AI agent.
  136. type AssetResponse struct {
  137. // The asset data, as a map of asset sets.
  138. Assets map[string]*AssetSet `json:"assets"`
  139. }
  140. // AssetSet represents a set of asset data.
  141. type AssetSet struct {
  142. // The name of the asset set.
  143. Name string `json:"name"`
  144. // The asset data for the set.
  145. Assets []*Asset `json:"assets"`
  146. }
  147. // Asset represents a single asset data point.
  148. type Asset struct {
  149. Type string `json:"type"`
  150. Properties AssetProperties `json:"properties"`
  151. Labels map[string]string `json:"labels,omitempty"`
  152. Start time.Time `json:"start"`
  153. End time.Time `json:"end"`
  154. Minutes float64 `json:"minutes"`
  155. Adjustment float64 `json:"adjustment"`
  156. TotalCost float64 `json:"totalCost"`
  157. // Disk-specific fields
  158. ByteHours float64 `json:"byteHours,omitempty"`
  159. ByteHoursUsed *float64 `json:"byteHoursUsed,omitempty"`
  160. ByteUsageMax *float64 `json:"byteUsageMax,omitempty"`
  161. StorageClass string `json:"storageClass,omitempty"`
  162. VolumeName string `json:"volumeName,omitempty"`
  163. ClaimName string `json:"claimName,omitempty"`
  164. ClaimNamespace string `json:"claimNamespace,omitempty"`
  165. Local float64 `json:"local,omitempty"`
  166. // Node-specific fields
  167. NodeType string `json:"nodeType,omitempty"`
  168. CPUCoreHours float64 `json:"cpuCoreHours,omitempty"`
  169. RAMByteHours float64 `json:"ramByteHours,omitempty"`
  170. GPUHours float64 `json:"gpuHours,omitempty"`
  171. GPUCount float64 `json:"gpuCount,omitempty"`
  172. CPUCost float64 `json:"cpuCost,omitempty"`
  173. GPUCost float64 `json:"gpuCost,omitempty"`
  174. RAMCost float64 `json:"ramCost,omitempty"`
  175. Discount float64 `json:"discount,omitempty"`
  176. Preemptible float64 `json:"preemptible,omitempty"`
  177. // Breakdown fields (can be used for different types)
  178. Breakdown *AssetBreakdown `json:"breakdown,omitempty"`
  179. CPUBreakdown *AssetBreakdown `json:"cpuBreakdown,omitempty"`
  180. RAMBreakdown *AssetBreakdown `json:"ramBreakdown,omitempty"`
  181. // Overhead (Node-specific)
  182. Overhead *NodeOverhead `json:"overhead,omitempty"`
  183. // LoadBalancer-specific fields
  184. Private bool `json:"private,omitempty"`
  185. Ip string `json:"ip,omitempty"`
  186. // Cloud-specific fields
  187. Credit float64 `json:"credit,omitempty"`
  188. }
  189. // NodeOverhead represents node overhead information
  190. type NodeOverhead struct {
  191. RamOverheadFraction float64 `json:"ramOverheadFraction"`
  192. CpuOverheadFraction float64 `json:"cpuOverheadFraction"`
  193. OverheadCostFraction float64 `json:"overheadCostFraction"`
  194. }
  195. type AssetProperties struct {
  196. Category string `json:"category,omitempty"`
  197. Provider string `json:"provider,omitempty"`
  198. Account string `json:"account,omitempty"`
  199. Project string `json:"project,omitempty"`
  200. Service string `json:"service,omitempty"`
  201. Cluster string `json:"cluster,omitempty"`
  202. Name string `json:"name,omitempty"`
  203. ProviderID string `json:"providerID,omitempty"`
  204. }
  205. type AssetBreakdown struct {
  206. Idle float64 `json:"idle"`
  207. Other float64 `json:"other"`
  208. System float64 `json:"system"`
  209. User float64 `json:"user"`
  210. }
  211. // CloudCostResponse represents the cloud cost data returned to the AI agent.
  212. type CloudCostResponse struct {
  213. // The cloud cost data, as a map of cloud cost sets.
  214. CloudCosts map[string]*CloudCostSet `json:"cloudCosts"`
  215. // Summary information
  216. Summary *CloudCostSummary `json:"summary,omitempty"`
  217. }
  218. // CloudCostSummary provides summary information about cloud costs
  219. type CloudCostSummary struct {
  220. TotalNetCost float64 `json:"totalNetCost"`
  221. TotalAmortizedCost float64 `json:"totalAmortizedCost"`
  222. TotalInvoicedCost float64 `json:"totalInvoicedCost"`
  223. KubernetesPercent float64 `json:"kubernetesPercent"`
  224. ProviderBreakdown map[string]float64 `json:"providerBreakdown,omitempty"`
  225. ServiceBreakdown map[string]float64 `json:"serviceBreakdown,omitempty"`
  226. RegionBreakdown map[string]float64 `json:"regionBreakdown,omitempty"`
  227. }
  228. // CloudCostSet represents a set of cloud cost data.
  229. type CloudCostSet struct {
  230. // The name of the cloud cost set.
  231. Name string `json:"name"`
  232. // The cloud cost data for the set.
  233. CloudCosts []*CloudCost `json:"cloudCosts"`
  234. // Aggregation information
  235. AggregationProperties []string `json:"aggregationProperties,omitempty"`
  236. // Time window
  237. Window *TimeWindow `json:"window,omitempty"`
  238. }
  239. // TimeWindow represents a time range
  240. type TimeWindow struct {
  241. Start time.Time `json:"start"`
  242. End time.Time `json:"end"`
  243. }
  244. // CloudCostProperties defines the properties of a cloud cost item.
  245. type CloudCostProperties struct {
  246. ProviderID string `json:"providerID,omitempty"`
  247. Provider string `json:"provider,omitempty"`
  248. AccountID string `json:"accountID,omitempty"`
  249. AccountName string `json:"accountName,omitempty"`
  250. InvoiceEntityID string `json:"invoiceEntityID,omitempty"`
  251. InvoiceEntityName string `json:"invoiceEntityName,omitempty"`
  252. RegionID string `json:"regionID,omitempty"`
  253. AvailabilityZone string `json:"availabilityZone,omitempty"`
  254. Service string `json:"service,omitempty"`
  255. Category string `json:"category,omitempty"`
  256. Labels map[string]string `json:"labels,omitempty"`
  257. }
  258. // CloudCost represents a single cloud cost data point.
  259. type CloudCost struct {
  260. Properties CloudCostProperties `json:"properties"`
  261. Window TimeWindow `json:"window"`
  262. ListCost CostMetric `json:"listCost"`
  263. NetCost CostMetric `json:"netCost"`
  264. AmortizedNetCost CostMetric `json:"amortizedNetCost"`
  265. InvoicedCost CostMetric `json:"invoicedCost"`
  266. AmortizedCost CostMetric `json:"amortizedCost"`
  267. }
  268. // CostMetric represents a cost value with Kubernetes percentage
  269. type CostMetric struct {
  270. Cost float64 `json:"cost"`
  271. KubernetesPercent float64 `json:"kubernetesPercent"`
  272. }
  273. // EfficiencyResponse represents the efficiency data returned to the AI agent.
  274. type EfficiencyResponse struct {
  275. Efficiencies []*EfficiencyMetric `json:"efficiencies"`
  276. }
  277. // EfficiencyMetric represents efficiency data for a single pod/workload.
  278. type EfficiencyMetric struct {
  279. Name string `json:"name"` // Pod/namespace/controller name based on aggregation
  280. // Current state
  281. CPUEfficiency float64 `json:"cpuEfficiency"` // Usage / Request ratio (0-1+)
  282. MemoryEfficiency float64 `json:"memoryEfficiency"` // Usage / Request ratio (0-1+)
  283. // Current requests and usage
  284. CPUCoresRequested float64 `json:"cpuCoresRequested"`
  285. CPUCoresUsed float64 `json:"cpuCoresUsed"`
  286. RAMBytesRequested float64 `json:"ramBytesRequested"`
  287. RAMBytesUsed float64 `json:"ramBytesUsed"`
  288. // Recommendations (based on actual usage with buffer)
  289. RecommendedCPURequest float64 `json:"recommendedCpuRequest"` // Recommended CPU cores
  290. RecommendedRAMRequest float64 `json:"recommendedRamRequest"` // Recommended RAM bytes
  291. // Resulting efficiency after applying recommendations
  292. ResultingCPUEfficiency float64 `json:"resultingCpuEfficiency"`
  293. ResultingMemoryEfficiency float64 `json:"resultingMemoryEfficiency"`
  294. // Cost analysis
  295. CurrentTotalCost float64 `json:"currentTotalCost"` // Current total cost
  296. RecommendedCost float64 `json:"recommendedCost"` // Estimated cost with recommendations
  297. CostSavings float64 `json:"costSavings"` // Potential savings
  298. CostSavingsPercent float64 `json:"costSavingsPercent"` // Savings as percentage
  299. // Buffer multiplier used for recommendations
  300. EfficiencyBufferMultiplier float64 `json:"efficiencyBufferMultiplier"` // Buffer multiplier applied (e.g., 1.2 for 20% headroom)
  301. // Time window
  302. Start time.Time `json:"start"`
  303. End time.Time `json:"end"`
  304. }
  305. // MCPServer holds the dependencies for the MCP API server.
  306. type MCPServer struct {
  307. costModel *costmodel.CostModel
  308. provider models.Provider
  309. cloudQuerier cloudcost.Querier
  310. }
  311. // NewMCPServer creates a new MCP Server.
  312. func NewMCPServer(costModel *costmodel.CostModel, provider models.Provider, cloudQuerier cloudcost.Querier) *MCPServer {
  313. return &MCPServer{
  314. costModel: costModel,
  315. provider: provider,
  316. cloudQuerier: cloudQuerier,
  317. }
  318. }
  319. // ProcessMCPRequest processes an MCP request and returns an MCP response.
  320. func (s *MCPServer) ProcessMCPRequest(request *MCPRequest) (*MCPResponse, error) {
  321. // 1. Validate Request
  322. if err := validate.Struct(request); err != nil {
  323. return nil, fmt.Errorf("validation failed: %w", err)
  324. }
  325. // 2. Query Dispatching
  326. var data interface{}
  327. var err error
  328. queryStart := time.Now()
  329. switch request.Query.QueryType {
  330. case AllocationQueryType:
  331. data, err = s.QueryAllocations(request.Query)
  332. case AssetQueryType:
  333. data, err = s.QueryAssets(request.Query)
  334. case CloudCostQueryType:
  335. data, err = s.QueryCloudCosts(request.Query)
  336. case EfficiencyQueryType:
  337. data, err = s.QueryEfficiency(request.Query)
  338. default:
  339. return nil, fmt.Errorf("unsupported query type: %s", request.Query.QueryType)
  340. }
  341. if err != nil {
  342. // Handle error appropriately, maybe return a JSON-RPC error response
  343. return nil, err
  344. }
  345. processingTime := time.Since(queryStart)
  346. // 3. Construct Final Response
  347. mcpResponse := &MCPResponse{
  348. Data: data,
  349. QueryInfo: QueryMetadata{
  350. QueryID: generateQueryID(),
  351. Timestamp: time.Now(),
  352. ProcessingTime: processingTime,
  353. },
  354. }
  355. return mcpResponse, nil
  356. }
  357. // validate is the singleton validator instance.
  358. var validate = validator.New()
  359. func generateQueryID() string {
  360. bytes := make([]byte, 8) // 16 hex characters
  361. if _, err := rand.Read(bytes); err != nil {
  362. // Fallback to timestamp-based ID if crypto/rand fails
  363. return fmt.Sprintf("query-%d", time.Now().UnixNano())
  364. }
  365. return fmt.Sprintf("query-%s", hex.EncodeToString(bytes))
  366. }
  367. func (s *MCPServer) QueryAllocations(query *OpenCostQueryRequest) (*AllocationResponse, error) {
  368. // 1. Parse Window
  369. window, err := opencost.ParseWindowWithOffset(query.Window, 0) // 0 offset for UTC
  370. if err != nil {
  371. return nil, fmt.Errorf("failed to parse window '%s': %w", query.Window, err)
  372. }
  373. // 2. Set default parameters
  374. var step time.Duration
  375. var aggregateBy []string
  376. var includeIdle, idleByNode, includeProportionalAssetResourceCosts, includeAggregatedMetadata, sharedLoadBalancer, shareIdle bool
  377. var accumulateBy opencost.AccumulateOption
  378. var filterString string
  379. // 3. Parse allocation parameters if provided
  380. if query.AllocationParams != nil {
  381. // Set step duration (default to window duration if not specified)
  382. if query.AllocationParams.Step > 0 {
  383. step = query.AllocationParams.Step
  384. } else {
  385. step = window.Duration()
  386. }
  387. // Parse aggregation properties
  388. if query.AllocationParams.Aggregate != "" {
  389. aggregateBy = strings.Split(query.AllocationParams.Aggregate, ",")
  390. }
  391. // Set boolean parameters
  392. includeIdle = query.AllocationParams.IncludeIdle
  393. idleByNode = query.AllocationParams.IdleByNode
  394. includeProportionalAssetResourceCosts = query.AllocationParams.IncludeProportionalAssetResourceCosts
  395. includeAggregatedMetadata = query.AllocationParams.IncludeAggregatedMetadata
  396. sharedLoadBalancer = query.AllocationParams.ShareLB
  397. shareIdle = query.AllocationParams.ShareIdle
  398. // Set filter string
  399. filterString = query.AllocationParams.Filter
  400. // Validate filter string if provided
  401. if filterString != "" {
  402. parser := allocation.NewAllocationFilterParser()
  403. _, err := parser.Parse(filterString)
  404. if err != nil {
  405. return nil, fmt.Errorf("invalid allocation filter '%s': %w", filterString, err)
  406. }
  407. }
  408. // Set accumulation option
  409. if query.AllocationParams.Accumulate {
  410. accumulateBy = opencost.AccumulateOptionAll
  411. } else {
  412. accumulateBy = opencost.AccumulateOptionNone
  413. }
  414. } else {
  415. // Default values when no parameters provided
  416. step = window.Duration()
  417. accumulateBy = opencost.AccumulateOptionNone
  418. filterString = ""
  419. }
  420. // 4. Call the existing QueryAllocation function with all parameters
  421. asr, err := s.costModel.QueryAllocation(
  422. window,
  423. step,
  424. aggregateBy,
  425. includeIdle,
  426. idleByNode,
  427. includeProportionalAssetResourceCosts,
  428. includeAggregatedMetadata,
  429. sharedLoadBalancer,
  430. accumulateBy,
  431. shareIdle,
  432. filterString,
  433. )
  434. if err != nil {
  435. return nil, fmt.Errorf("failed to query allocations: %w", err)
  436. }
  437. // 5. Handle the AllocationSetRange result
  438. if asr == nil || len(asr.Allocations) == 0 {
  439. return &AllocationResponse{
  440. Allocations: make(map[string]*AllocationSet),
  441. }, nil
  442. }
  443. // 6. Transform the result to MCP format
  444. // If we have multiple sets, we'll combine them or return the first one
  445. // For now, let's return the first allocation set
  446. firstSet := asr.Allocations[0]
  447. return transformAllocationSet(firstSet), nil
  448. }
  449. // transformAllocationSet converts an opencost.AllocationSet into the MCP's AllocationResponse format.
  450. func transformAllocationSet(allocSet *opencost.AllocationSet) *AllocationResponse {
  451. if allocSet == nil {
  452. return &AllocationResponse{Allocations: make(map[string]*AllocationSet)}
  453. }
  454. mcpAllocations := make(map[string]*AllocationSet)
  455. // Create a single set for all allocations
  456. mcpSet := &AllocationSet{
  457. Name: "allocations",
  458. Allocations: []*Allocation{},
  459. }
  460. // Convert each allocation
  461. for _, alloc := range allocSet.Allocations {
  462. if alloc == nil {
  463. continue
  464. }
  465. mcpAlloc := &Allocation{
  466. Name: alloc.Name,
  467. CPUCost: alloc.CPUCost,
  468. GPUCost: alloc.GPUCost,
  469. RAMCost: alloc.RAMCost,
  470. PVCost: alloc.PVCost(), // Call the method
  471. NetworkCost: alloc.NetworkCost,
  472. SharedCost: alloc.SharedCost,
  473. ExternalCost: alloc.ExternalCost,
  474. TotalCost: alloc.TotalCost(),
  475. CPUCoreHours: alloc.CPUCoreHours,
  476. RAMByteHours: alloc.RAMByteHours,
  477. GPUHours: alloc.GPUHours,
  478. PVByteHours: alloc.PVBytes(), // Use the method directly
  479. Start: alloc.Start,
  480. End: alloc.End,
  481. }
  482. mcpSet.Allocations = append(mcpSet.Allocations, mcpAlloc)
  483. }
  484. mcpAllocations["allocations"] = mcpSet
  485. return &AllocationResponse{
  486. Allocations: mcpAllocations,
  487. }
  488. }
  489. func (s *MCPServer) QueryAssets(query *OpenCostQueryRequest) (*AssetResponse, error) {
  490. // 1. Parse Window
  491. window, err := opencost.ParseWindowWithOffset(query.Window, 0) // 0 offset for UTC
  492. if err != nil {
  493. return nil, fmt.Errorf("failed to parse window '%s': %w", query.Window, err)
  494. }
  495. // 2. Set Query Options
  496. start := *window.Start()
  497. end := *window.End()
  498. // 3. Call CostModel to get the asset set
  499. assetSet, err := s.costModel.ComputeAssets(start, end)
  500. if err != nil {
  501. return nil, fmt.Errorf("failed to compute assets: %w", err)
  502. }
  503. // 4. Transform Response for the MCP API
  504. return transformAssetSet(assetSet), nil
  505. }
  506. // transformAssetSet converts a opencost.AssetSet into the MCP's AssetResponse format.
  507. func transformAssetSet(assetSet *opencost.AssetSet) *AssetResponse {
  508. if assetSet == nil {
  509. return &AssetResponse{Assets: make(map[string]*AssetSet)}
  510. }
  511. mcpAssets := make(map[string]*AssetSet)
  512. // Create a single set for all assets
  513. mcpSet := &AssetSet{
  514. Name: "assets",
  515. Assets: []*Asset{},
  516. }
  517. for _, asset := range assetSet.Assets {
  518. if asset == nil {
  519. continue
  520. }
  521. properties := asset.GetProperties()
  522. labels := asset.GetLabels()
  523. mcpAsset := &Asset{
  524. Type: asset.Type().String(),
  525. Properties: AssetProperties{
  526. Category: properties.Category,
  527. Provider: properties.Provider,
  528. Account: properties.Account,
  529. Project: properties.Project,
  530. Service: properties.Service,
  531. Cluster: properties.Cluster,
  532. Name: properties.Name,
  533. ProviderID: properties.ProviderID,
  534. },
  535. Labels: labels,
  536. Start: asset.GetStart(),
  537. End: asset.GetEnd(),
  538. Minutes: asset.Minutes(),
  539. Adjustment: asset.GetAdjustment(),
  540. TotalCost: asset.TotalCost(),
  541. }
  542. // Handle type-specific fields
  543. switch a := asset.(type) {
  544. case *opencost.Disk:
  545. mcpAsset.ByteHours = a.ByteHours
  546. mcpAsset.ByteHoursUsed = a.ByteHoursUsed
  547. mcpAsset.ByteUsageMax = a.ByteUsageMax
  548. mcpAsset.StorageClass = a.StorageClass
  549. mcpAsset.VolumeName = a.VolumeName
  550. mcpAsset.ClaimName = a.ClaimName
  551. mcpAsset.ClaimNamespace = a.ClaimNamespace
  552. mcpAsset.Local = a.Local
  553. if a.Breakdown != nil {
  554. mcpAsset.Breakdown = &AssetBreakdown{
  555. Idle: a.Breakdown.Idle,
  556. Other: a.Breakdown.Other,
  557. System: a.Breakdown.System,
  558. User: a.Breakdown.User,
  559. }
  560. }
  561. case *opencost.Node:
  562. mcpAsset.NodeType = a.NodeType
  563. mcpAsset.CPUCoreHours = a.CPUCoreHours
  564. mcpAsset.RAMByteHours = a.RAMByteHours
  565. mcpAsset.GPUHours = a.GPUHours
  566. mcpAsset.GPUCount = a.GPUCount
  567. mcpAsset.CPUCost = a.CPUCost
  568. mcpAsset.GPUCost = a.GPUCost
  569. mcpAsset.RAMCost = a.RAMCost
  570. mcpAsset.Discount = a.Discount
  571. mcpAsset.Preemptible = a.Preemptible
  572. if a.CPUBreakdown != nil {
  573. mcpAsset.CPUBreakdown = &AssetBreakdown{
  574. Idle: a.CPUBreakdown.Idle,
  575. Other: a.CPUBreakdown.Other,
  576. System: a.CPUBreakdown.System,
  577. User: a.CPUBreakdown.User,
  578. }
  579. }
  580. if a.RAMBreakdown != nil {
  581. mcpAsset.RAMBreakdown = &AssetBreakdown{
  582. Idle: a.RAMBreakdown.Idle,
  583. Other: a.RAMBreakdown.Other,
  584. System: a.RAMBreakdown.System,
  585. User: a.RAMBreakdown.User,
  586. }
  587. }
  588. if a.Overhead != nil {
  589. mcpAsset.Overhead = &NodeOverhead{
  590. RamOverheadFraction: a.Overhead.RamOverheadFraction,
  591. CpuOverheadFraction: a.Overhead.CpuOverheadFraction,
  592. OverheadCostFraction: a.Overhead.OverheadCostFraction,
  593. }
  594. }
  595. case *opencost.LoadBalancer:
  596. mcpAsset.Private = a.Private
  597. mcpAsset.Ip = a.Ip
  598. case *opencost.Network:
  599. // Network assets have no specific fields beyond the base asset structure
  600. // All relevant data is in Properties, Labels, Cost, etc.
  601. case *opencost.Cloud:
  602. mcpAsset.Credit = a.Credit
  603. case *opencost.ClusterManagement:
  604. // ClusterManagement assets have no specific fields beyond the base asset structure
  605. // All relevant data is in Properties, Labels, Cost, etc.
  606. }
  607. mcpSet.Assets = append(mcpSet.Assets, mcpAsset)
  608. }
  609. mcpAssets["assets"] = mcpSet
  610. return &AssetResponse{
  611. Assets: mcpAssets,
  612. }
  613. }
  614. // QueryCloudCosts translates an MCP query into a CloudCost repository query and transforms the result.
  615. func (s *MCPServer) QueryCloudCosts(query *OpenCostQueryRequest) (*CloudCostResponse, error) {
  616. // 1. Check if cloud cost querier is available
  617. if s.cloudQuerier == nil {
  618. return nil, fmt.Errorf("cloud cost querier not configured - check cloud-integration.json file")
  619. }
  620. // 2. Parse Window
  621. window, err := opencost.ParseWindowWithOffset(query.Window, 0) // 0 offset for UTC
  622. if err != nil {
  623. return nil, fmt.Errorf("failed to parse window '%s': %w", query.Window, err)
  624. }
  625. // 3. Build query request
  626. request := cloudcost.QueryRequest{
  627. Start: *window.Start(),
  628. End: *window.End(),
  629. Filter: nil, // Will be set from CloudCostParams if provided
  630. }
  631. // 4. Apply filtering and aggregation from CloudCostParams
  632. if query.CloudCostParams != nil {
  633. request = s.buildCloudCostQueryRequest(request, query.CloudCostParams)
  634. }
  635. // 5. Query the repository (this handles multiple cloud providers automatically)
  636. ccsr, err := s.cloudQuerier.Query(context.TODO(), request)
  637. if err != nil {
  638. return nil, fmt.Errorf("failed to query cloud costs: %w", err)
  639. }
  640. // 6. Transform Response
  641. return transformCloudCostSetRange(ccsr), nil
  642. }
  643. // buildCloudCostQueryRequest builds a QueryRequest from CloudCostParams
  644. func (s *MCPServer) buildCloudCostQueryRequest(request cloudcost.QueryRequest, params *CloudCostQuery) cloudcost.QueryRequest {
  645. // Set aggregation
  646. if params.Aggregate != "" {
  647. aggregateBy := strings.Split(params.Aggregate, ",")
  648. request.AggregateBy = aggregateBy
  649. }
  650. // Set accumulation
  651. if params.Accumulate != "" {
  652. request.Accumulate = opencost.ParseAccumulate(params.Accumulate)
  653. }
  654. // Build filter from individual parameters or filter string
  655. var filter filter.Filter
  656. var err error
  657. if params.Filter != "" {
  658. // Parse the filter string directly
  659. parser := cloudcostfilter.NewCloudCostFilterParser()
  660. filter, err = parser.Parse(params.Filter)
  661. if err != nil {
  662. // Log error but continue without filter rather than failing the entire request
  663. log.Warnf("failed to parse filter string '%s': %v", params.Filter, err)
  664. }
  665. } else {
  666. // Build filter from individual parameters
  667. filter = s.buildFilterFromParams(params)
  668. }
  669. request.Filter = filter
  670. return request
  671. }
  672. // buildFilterFromParams creates a filter from individual CloudCostQuery parameters
  673. func (s *MCPServer) buildFilterFromParams(params *CloudCostQuery) filter.Filter {
  674. var filterParts []string
  675. // Add provider filter
  676. if params.Provider != "" {
  677. filterParts = append(filterParts, fmt.Sprintf(`provider:"%s"`, params.Provider))
  678. }
  679. // Add providerID filter
  680. if params.ProviderID != "" {
  681. filterParts = append(filterParts, fmt.Sprintf(`providerID:"%s"`, params.ProviderID))
  682. }
  683. // Add service filter
  684. if params.Service != "" {
  685. filterParts = append(filterParts, fmt.Sprintf(`service:"%s"`, params.Service))
  686. }
  687. // Add category filter
  688. if params.Category != "" {
  689. filterParts = append(filterParts, fmt.Sprintf(`category:"%s"`, params.Category))
  690. }
  691. // Region is intentionally not supported here
  692. // Add account filter (maps to accountID)
  693. if params.AccountID != "" {
  694. filterParts = append(filterParts, fmt.Sprintf(`accountID:"%s"`, params.AccountID))
  695. }
  696. // Add invoiceEntityID filter
  697. if params.InvoiceEntityID != "" {
  698. filterParts = append(filterParts, fmt.Sprintf(`invoiceEntityID:"%s"`, params.InvoiceEntityID))
  699. }
  700. // Add label filters (label[key]:"value")
  701. if len(params.Labels) > 0 {
  702. for k, v := range params.Labels {
  703. if k == "" {
  704. continue
  705. }
  706. filterParts = append(filterParts, fmt.Sprintf(`label[%s]:"%s"`, k, v))
  707. }
  708. }
  709. // If no filters specified, return nil
  710. if len(filterParts) == 0 {
  711. return nil
  712. }
  713. // Combine all filter parts with AND logic (parser expects 'and')
  714. filterString := strings.Join(filterParts, " and ")
  715. // Parse the combined filter string
  716. parser := cloudcostfilter.NewCloudCostFilterParser()
  717. filter, err := parser.Parse(filterString)
  718. if err != nil {
  719. // Log error but return nil rather than failing
  720. log.Warnf("failed to parse combined filter '%s': %v", filterString, err)
  721. return nil
  722. }
  723. return filter
  724. }
  725. // transformCloudCostSetRange converts a opencost.CloudCostSetRange into the MCP's CloudCostResponse format.
  726. func transformCloudCostSetRange(ccsr *opencost.CloudCostSetRange) *CloudCostResponse {
  727. if ccsr == nil || len(ccsr.CloudCostSets) == 0 {
  728. return &CloudCostResponse{
  729. CloudCosts: make(map[string]*CloudCostSet),
  730. Summary: &CloudCostSummary{
  731. TotalNetCost: 0,
  732. },
  733. }
  734. }
  735. mcpCloudCosts := make(map[string]*CloudCostSet)
  736. var totalNetCost, totalAmortizedCost, totalInvoicedCost float64
  737. providerBreakdown := make(map[string]float64)
  738. serviceBreakdown := make(map[string]float64)
  739. regionBreakdown := make(map[string]float64)
  740. // Process each cloud cost set in the range
  741. for i, ccSet := range ccsr.CloudCostSets {
  742. if ccSet == nil {
  743. log.Warnf("transformCloudCostSetRange: skipping nil CloudCostSet at index %d", i)
  744. continue
  745. }
  746. // Check for nil Window or nil Start/End pointers before dereferencing
  747. if ccSet.Window.Start() == nil || ccSet.Window.End() == nil {
  748. log.Warnf("transformCloudCostSetRange: skipping CloudCostSet at index %d with invalid window (start=%v, end=%v)", i, ccSet.Window.Start(), ccSet.Window.End())
  749. continue
  750. }
  751. setName := fmt.Sprintf("cloudcosts_%d", i)
  752. mcpSet := &CloudCostSet{
  753. Name: setName,
  754. CloudCosts: []*CloudCost{},
  755. AggregationProperties: ccSet.AggregationProperties,
  756. Window: &TimeWindow{
  757. Start: *ccSet.Window.Start(),
  758. End: *ccSet.Window.End(),
  759. },
  760. }
  761. // Convert each cloud cost item
  762. for _, item := range ccSet.CloudCosts {
  763. if item == nil {
  764. log.Warnf("transformCloudCostSetRange: skipping nil CloudCost item in set %s", setName)
  765. continue
  766. }
  767. // Check for nil Window or nil Start/End pointers on the item
  768. if item.Window.Start() == nil || item.Window.End() == nil {
  769. log.Warnf("transformCloudCostSetRange: skipping CloudCost item with invalid window (start=%v, end=%v) in set %s", item.Window.Start(), item.Window.End(), setName)
  770. continue
  771. }
  772. mcpCC := &CloudCost{
  773. Properties: CloudCostProperties{
  774. ProviderID: item.Properties.ProviderID,
  775. Provider: item.Properties.Provider,
  776. AccountID: item.Properties.AccountID,
  777. AccountName: item.Properties.AccountName,
  778. InvoiceEntityID: item.Properties.InvoiceEntityID,
  779. InvoiceEntityName: item.Properties.InvoiceEntityName,
  780. RegionID: item.Properties.RegionID,
  781. AvailabilityZone: item.Properties.AvailabilityZone,
  782. Service: item.Properties.Service,
  783. Category: item.Properties.Category,
  784. Labels: item.Properties.Labels,
  785. },
  786. Window: TimeWindow{
  787. Start: *item.Window.Start(),
  788. End: *item.Window.End(),
  789. },
  790. ListCost: CostMetric{
  791. Cost: item.ListCost.Cost,
  792. KubernetesPercent: item.ListCost.KubernetesPercent,
  793. },
  794. NetCost: CostMetric{
  795. Cost: item.NetCost.Cost,
  796. KubernetesPercent: item.NetCost.KubernetesPercent,
  797. },
  798. AmortizedNetCost: CostMetric{
  799. Cost: item.AmortizedNetCost.Cost,
  800. KubernetesPercent: item.AmortizedNetCost.KubernetesPercent,
  801. },
  802. InvoicedCost: CostMetric{
  803. Cost: item.InvoicedCost.Cost,
  804. KubernetesPercent: item.InvoicedCost.KubernetesPercent,
  805. },
  806. AmortizedCost: CostMetric{
  807. Cost: item.AmortizedCost.Cost,
  808. KubernetesPercent: item.AmortizedCost.KubernetesPercent,
  809. },
  810. }
  811. mcpSet.CloudCosts = append(mcpSet.CloudCosts, mcpCC)
  812. // Update summary totals
  813. totalNetCost += item.NetCost.Cost
  814. totalAmortizedCost += item.AmortizedNetCost.Cost
  815. totalInvoicedCost += item.InvoicedCost.Cost
  816. // Update breakdowns
  817. providerBreakdown[item.Properties.Provider] += item.NetCost.Cost
  818. serviceBreakdown[item.Properties.Service] += item.NetCost.Cost
  819. regionBreakdown[item.Properties.RegionID] += item.NetCost.Cost
  820. }
  821. mcpCloudCosts[setName] = mcpSet
  822. }
  823. // Calculate cost-weighted average Kubernetes percentage (by NetCost)
  824. var avgKubernetesPercent float64
  825. var numerator, denominator float64
  826. for _, ccSet := range ccsr.CloudCostSets {
  827. if ccSet == nil {
  828. log.Warnf("transformCloudCostSetRange: skipping nil CloudCostSet in Kubernetes percent calculation")
  829. continue
  830. }
  831. // Skip sets with invalid windows (consistent with first loop)
  832. if ccSet.Window.Start() == nil || ccSet.Window.End() == nil {
  833. log.Warnf("transformCloudCostSetRange: skipping CloudCostSet with invalid window (start=%v, end=%v) in Kubernetes percent calculation", ccSet.Window.Start(), ccSet.Window.End())
  834. continue
  835. }
  836. for _, item := range ccSet.CloudCosts {
  837. if item == nil {
  838. log.Warnf("transformCloudCostSetRange: skipping nil CloudCost item in Kubernetes percent calculation")
  839. continue
  840. }
  841. // Skip items with invalid windows (consistent with first loop)
  842. if item.Window.Start() == nil || item.Window.End() == nil {
  843. log.Warnf("transformCloudCostSetRange: skipping CloudCost item with invalid window (start=%v, end=%v) in Kubernetes percent calculation", item.Window.Start(), item.Window.End())
  844. continue
  845. }
  846. cost := item.NetCost.Cost
  847. percent := item.NetCost.KubernetesPercent
  848. if cost <= 0 {
  849. continue
  850. }
  851. numerator += cost * percent
  852. denominator += cost
  853. }
  854. }
  855. if denominator > 0 {
  856. avgKubernetesPercent = numerator / denominator
  857. }
  858. summary := &CloudCostSummary{
  859. TotalNetCost: totalNetCost,
  860. TotalAmortizedCost: totalAmortizedCost,
  861. TotalInvoicedCost: totalInvoicedCost,
  862. KubernetesPercent: avgKubernetesPercent,
  863. ProviderBreakdown: providerBreakdown,
  864. ServiceBreakdown: serviceBreakdown,
  865. RegionBreakdown: regionBreakdown,
  866. }
  867. return &CloudCostResponse{
  868. CloudCosts: mcpCloudCosts,
  869. Summary: summary,
  870. }
  871. }
  872. // QueryEfficiency queries allocation data and computes efficiency metrics with recommendations.
  873. func (s *MCPServer) QueryEfficiency(query *OpenCostQueryRequest) (*EfficiencyResponse, error) {
  874. // 1. Parse Window
  875. window, err := opencost.ParseWindowWithOffset(query.Window, 0)
  876. if err != nil {
  877. return nil, fmt.Errorf("failed to parse window '%s': %w", query.Window, err)
  878. }
  879. // 2. Set default parameters
  880. var aggregateBy []string
  881. var filterString string
  882. var bufferMultiplier float64 = efficiencyBufferMultiplier // Default to 1.2 (20% headroom)
  883. // 3. Parse efficiency parameters if provided
  884. if query.EfficiencyParams != nil {
  885. // Parse aggregation properties (default to pod if not specified)
  886. if query.EfficiencyParams.Aggregate != "" {
  887. aggregateBy = strings.Split(query.EfficiencyParams.Aggregate, ",")
  888. } else {
  889. aggregateBy = []string{"pod"}
  890. }
  891. // Set filter string
  892. filterString = query.EfficiencyParams.Filter
  893. // Validate filter string if provided
  894. if filterString != "" {
  895. parser := allocation.NewAllocationFilterParser()
  896. _, err := parser.Parse(filterString)
  897. if err != nil {
  898. return nil, fmt.Errorf("invalid allocation filter '%s': %w", filterString, err)
  899. }
  900. }
  901. // Set buffer multiplier if provided, otherwise use default
  902. if query.EfficiencyParams.EfficiencyBufferMultiplier != nil {
  903. bufferMultiplier = *query.EfficiencyParams.EfficiencyBufferMultiplier
  904. }
  905. } else {
  906. // Default to pod-level aggregation
  907. aggregateBy = []string{"pod"}
  908. filterString = ""
  909. }
  910. // 4. Query allocations with the specified parameters
  911. // Use the entire window as step to get aggregated data
  912. step := window.Duration()
  913. asr, err := s.costModel.QueryAllocation(
  914. window,
  915. step,
  916. aggregateBy,
  917. false, // includeIdle
  918. false, // idleByNode
  919. false, // includeProportionalAssetResourceCosts
  920. false, // includeAggregatedMetadata
  921. false, // sharedLoadBalancer
  922. opencost.AccumulateOptionNone,
  923. false, // shareIdle
  924. filterString,
  925. )
  926. if err != nil {
  927. return nil, fmt.Errorf("failed to query allocations: %w", err)
  928. }
  929. // 5. Handle empty results
  930. if asr == nil || len(asr.Allocations) == 0 {
  931. return &EfficiencyResponse{
  932. Efficiencies: []*EfficiencyMetric{},
  933. }, nil
  934. }
  935. // 6. Compute efficiency metrics from allocations using concurrent processing
  936. var (
  937. mu sync.Mutex
  938. wg sync.WaitGroup
  939. efficiencies = make([]*EfficiencyMetric, 0)
  940. )
  941. // Process each allocation set (typically one per time window) concurrently
  942. for _, allocSet := range asr.Allocations {
  943. if allocSet == nil {
  944. continue
  945. }
  946. // Process this allocation set in a goroutine
  947. wg.Add(1)
  948. go func(allocSet *opencost.AllocationSet) {
  949. defer wg.Done()
  950. // Compute metrics for all allocations in this set
  951. localMetrics := make([]*EfficiencyMetric, 0, len(allocSet.Allocations))
  952. for _, alloc := range allocSet.Allocations {
  953. if metric := computeEfficiencyMetric(alloc, bufferMultiplier); metric != nil {
  954. localMetrics = append(localMetrics, metric)
  955. }
  956. }
  957. // Append results to shared slice (thread-safe)
  958. if len(localMetrics) > 0 {
  959. mu.Lock()
  960. efficiencies = append(efficiencies, localMetrics...)
  961. mu.Unlock()
  962. }
  963. }(allocSet)
  964. }
  965. // Wait for all goroutines to complete
  966. wg.Wait()
  967. return &EfficiencyResponse{
  968. Efficiencies: efficiencies,
  969. }, nil
  970. }
  971. // safeDiv performs division and returns 0 if denominator is 0.
  972. func safeDiv(numerator, denominator float64) float64 {
  973. if denominator == 0 {
  974. return 0
  975. }
  976. return numerator / denominator
  977. }
  978. // computeEfficiencyMetric calculates efficiency metrics for a single allocation.
  979. func computeEfficiencyMetric(alloc *opencost.Allocation, bufferMultiplier float64) *EfficiencyMetric {
  980. if alloc == nil {
  981. return nil
  982. }
  983. // Calculate time duration in hours
  984. hours := alloc.Minutes() / 60.0
  985. if hours <= 0 {
  986. return nil
  987. }
  988. // Get current usage (average over the period)
  989. cpuCoresUsed := alloc.CPUCoreHours / hours
  990. ramBytesUsed := alloc.RAMByteHours / hours
  991. // Get requested amounts
  992. cpuCoresRequested := alloc.CPUCoreRequestAverage
  993. ramBytesRequested := alloc.RAMBytesRequestAverage
  994. // Calculate current efficiency (will be 0 if no requests are set)
  995. cpuEfficiency := safeDiv(cpuCoresUsed, cpuCoresRequested)
  996. memoryEfficiency := safeDiv(ramBytesUsed, ramBytesRequested)
  997. // Calculate recommendations with buffer for headroom
  998. recommendedCPU := cpuCoresUsed * bufferMultiplier
  999. recommendedRAM := ramBytesUsed * bufferMultiplier
  1000. // Ensure recommendations meet minimum thresholds
  1001. if recommendedCPU < efficiencyMinCPU {
  1002. recommendedCPU = efficiencyMinCPU
  1003. }
  1004. if recommendedRAM < efficiencyMinRAM {
  1005. recommendedRAM = efficiencyMinRAM
  1006. }
  1007. // Calculate resulting efficiency after applying recommendations
  1008. resultingCPUEff := safeDiv(cpuCoresUsed, recommendedCPU)
  1009. resultingMemEff := safeDiv(ramBytesUsed, recommendedRAM)
  1010. // Calculate cost per unit based on REQUESTED amounts (not used amounts)
  1011. // This gives us the cost per core-hour or byte-hour that the cluster charges
  1012. cpuCostPerCoreHour := safeDiv(alloc.CPUCost, cpuCoresRequested*hours)
  1013. ramCostPerByteHour := safeDiv(alloc.RAMCost, ramBytesRequested*hours)
  1014. // Current total cost
  1015. currentTotalCost := alloc.TotalCost()
  1016. // Estimate recommended cost based on recommended requests
  1017. recommendedCPUCost := recommendedCPU * hours * cpuCostPerCoreHour
  1018. recommendedRAMCost := recommendedRAM * hours * ramCostPerByteHour
  1019. // Keep other costs the same (PV, network, shared, external, GPU)
  1020. otherCosts := alloc.PVCost() + alloc.NetworkCost + alloc.SharedCost + alloc.ExternalCost + alloc.GPUCost
  1021. recommendedTotalCost := recommendedCPUCost + recommendedRAMCost + otherCosts
  1022. // Clamp recommended cost to avoid rounding issues making it higher than current
  1023. if recommendedTotalCost > currentTotalCost && (recommendedTotalCost-currentTotalCost) < 0.0001 {
  1024. recommendedTotalCost = currentTotalCost
  1025. }
  1026. // Calculate savings
  1027. costSavings := currentTotalCost - recommendedTotalCost
  1028. costSavingsPercent := safeDiv(costSavings, currentTotalCost) * 100
  1029. return &EfficiencyMetric{
  1030. Name: alloc.Name,
  1031. CPUEfficiency: cpuEfficiency,
  1032. MemoryEfficiency: memoryEfficiency,
  1033. CPUCoresRequested: cpuCoresRequested,
  1034. CPUCoresUsed: cpuCoresUsed,
  1035. RAMBytesRequested: ramBytesRequested,
  1036. RAMBytesUsed: ramBytesUsed,
  1037. RecommendedCPURequest: recommendedCPU,
  1038. RecommendedRAMRequest: recommendedRAM,
  1039. ResultingCPUEfficiency: resultingCPUEff,
  1040. ResultingMemoryEfficiency: resultingMemEff,
  1041. CurrentTotalCost: currentTotalCost,
  1042. RecommendedCost: recommendedTotalCost,
  1043. CostSavings: costSavings,
  1044. CostSavingsPercent: costSavingsPercent,
  1045. EfficiencyBufferMultiplier: bufferMultiplier,
  1046. Start: alloc.Start,
  1047. End: alloc.End,
  1048. }
  1049. }