server.go 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225
  1. package mcp
  2. import (
  3. "context"
  4. "crypto/rand"
  5. "encoding/hex"
  6. "fmt"
  7. "strings"
  8. "sync"
  9. "time"
  10. "github.com/go-playground/validator/v10"
  11. "github.com/opencost/opencost/core/pkg/filter"
  12. "github.com/opencost/opencost/core/pkg/filter/allocation"
  13. cloudcostfilter "github.com/opencost/opencost/core/pkg/filter/cloudcost"
  14. "github.com/opencost/opencost/core/pkg/log"
  15. "github.com/opencost/opencost/core/pkg/opencost"
  16. models "github.com/opencost/opencost/pkg/cloud/models"
  17. "github.com/opencost/opencost/pkg/cloudcost"
  18. "github.com/opencost/opencost/pkg/costmodel"
  19. "github.com/opencost/opencost/pkg/env"
  20. )
  21. // QueryType defines the type of query to be executed.
  22. type QueryType string
  23. const (
  24. AllocationQueryType QueryType = "allocation"
  25. AssetQueryType QueryType = "asset"
  26. CloudCostQueryType QueryType = "cloudcost"
  27. EfficiencyQueryType QueryType = "efficiency"
  28. )
  29. // Efficiency calculation constants
  30. const (
  31. efficiencyBufferMultiplier = 1.2 // 20% headroom for stability
  32. efficiencyMinCPU = 0.001 // minimum CPU cores
  33. efficiencyMinRAM = 1024 * 1024 // 1 MB minimum RAM
  34. )
  35. // MCPRequest represents a single turn in a conversation with the OpenCost MCP server.
  36. type MCPRequest struct {
  37. SessionID string `json:"sessionId"`
  38. Query *OpenCostQueryRequest `json:"query"`
  39. }
  40. // MCPResponse is the response from the OpenCost MCP server for a single turn.
  41. type MCPResponse struct {
  42. Data interface{} `json:"data"`
  43. QueryInfo QueryMetadata `json:"queryInfo"`
  44. }
  45. // QueryMetadata contains metadata about the query execution.
  46. type QueryMetadata struct {
  47. QueryID string `json:"queryId"`
  48. Timestamp time.Time `json:"timestamp"`
  49. ProcessingTime time.Duration `json:"processingTime"`
  50. }
  51. // OpenCostQueryRequest provides a unified interface for all OpenCost query types.
  52. type OpenCostQueryRequest struct {
  53. QueryType QueryType `json:"queryType" validate:"required,oneof=allocation asset cloudcost efficiency"`
  54. Window string `json:"window" validate:"required"`
  55. AllocationParams *AllocationQuery `json:"allocationParams,omitempty"`
  56. AssetParams *AssetQuery `json:"assetParams,omitempty"`
  57. CloudCostParams *CloudCostQuery `json:"cloudCostParams,omitempty"`
  58. EfficiencyParams *EfficiencyQuery `json:"efficiencyParams,omitempty"`
  59. }
  60. // AllocationQuery contains the parameters for an allocation query.
  61. type AllocationQuery struct {
  62. Step time.Duration `json:"step,omitempty"`
  63. Accumulate bool `json:"accumulate,omitempty"`
  64. ShareIdle bool `json:"shareIdle,omitempty"`
  65. Aggregate string `json:"aggregate,omitempty"`
  66. IncludeIdle bool `json:"includeIdle,omitempty"`
  67. IdleByNode bool `json:"idleByNode,omitempty"`
  68. IncludeProportionalAssetResourceCosts bool `json:"includeProportionalAssetResourceCosts,omitempty"`
  69. IncludeAggregatedMetadata bool `json:"includeAggregatedMetadata,omitempty"`
  70. ShareLB bool `json:"sharelb,omitempty"`
  71. Filter string `json:"filter,omitempty"` // Filter expression for allocations (e.g., "cluster:production", "namespace:kube-system")
  72. }
  73. // AssetQuery contains the parameters for an asset query.
  74. type AssetQuery struct {
  75. // Currently no specific parameters needed for asset queries as it only takes window as parameter
  76. }
  77. // CloudCostQuery contains the parameters for a cloud cost query.
  78. type CloudCostQuery struct {
  79. Aggregate string `json:"aggregate,omitempty"` // Comma-separated list of aggregation properties
  80. Accumulate string `json:"accumulate,omitempty"` // e.g., "week", "day", "month"
  81. Filter string `json:"filter,omitempty"` // Filter expression for cloud costs
  82. Provider string `json:"provider,omitempty"` // Cloud provider filter (aws, gcp, azure, etc.)
  83. Service string `json:"service,omitempty"` // Service filter (ec2, s3, compute, etc.)
  84. Category string `json:"category,omitempty"` // Category filter (compute, storage, network, etc.)
  85. Region string `json:"region,omitempty"` // Region filter
  86. // Additional explicit fields for filtering
  87. AccountID string `json:"accountID,omitempty"` // Alias of Account; maps to accountID
  88. InvoiceEntityID string `json:"invoiceEntityID,omitempty"` // Invoice entity ID filter
  89. ProviderID string `json:"providerID,omitempty"` // Cloud provider resource ID filter
  90. Labels map[string]string `json:"labels,omitempty"` // Label filters (key->value)
  91. }
  92. // EfficiencyQuery contains the parameters for an efficiency query.
  93. type EfficiencyQuery struct {
  94. Aggregate string `json:"aggregate,omitempty"` // Aggregation properties (e.g., "pod", "namespace", "controller")
  95. Filter string `json:"filter,omitempty"` // Filter expression for allocations (same as AllocationQuery)
  96. EfficiencyBufferMultiplier *float64 `json:"efficiencyBufferMultiplier,omitempty"` // Buffer multiplier for recommendations (default: 1.2 for 20% headroom)
  97. }
  98. // AllocationResponse represents the allocation data returned to the AI agent.
  99. type AllocationResponse struct {
  100. // The allocation data, as a map of allocation sets.
  101. Allocations map[string]*AllocationSet `json:"allocations"`
  102. }
  103. // AllocationSet represents a set of allocation data.
  104. type AllocationSet struct {
  105. // The name of the allocation set.
  106. Name string `json:"name"`
  107. Properties map[string]string `json:"properties"`
  108. Allocations []*Allocation `json:"allocations"`
  109. }
  110. // TotalCost calculates the total cost of all allocations in the set.
  111. func (as *AllocationSet) TotalCost() float64 {
  112. var total float64
  113. for _, alloc := range as.Allocations {
  114. total += alloc.TotalCost
  115. }
  116. return total
  117. }
  118. // Allocation represents a single allocation data point.
  119. type Allocation struct {
  120. Name string `json:"name"` // Allocation key (namespace, cluster, etc.)
  121. CPUCost float64 `json:"cpuCost"` // Cost of CPU usage
  122. GPUCost float64 `json:"gpuCost"` // Cost of GPU usage
  123. RAMCost float64 `json:"ramCost"` // Cost of memory usage
  124. PVCost float64 `json:"pvCost"` // Cost of persistent volumes
  125. NetworkCost float64 `json:"networkCost"` // Cost of network usage
  126. SharedCost float64 `json:"sharedCost"` // Shared/unallocated costs assigned here
  127. ExternalCost float64 `json:"externalCost"` // External costs (cloud services, etc.)
  128. TotalCost float64 `json:"totalCost"` // Sum of all costs above
  129. CPUCoreHours float64 `json:"cpuCoreHours"` // Usage metrics: CPU core-hours
  130. RAMByteHours float64 `json:"ramByteHours"` // Usage metrics: RAM byte-hours
  131. GPUHours float64 `json:"gpuHours"` // Usage metrics: GPU-hours
  132. PVByteHours float64 `json:"pvByteHours"` // Usage metrics: PV byte-hours
  133. Start time.Time `json:"start"` // Start timestamp for this allocation
  134. End time.Time `json:"end"` // End timestamp for this allocation
  135. }
  136. // AssetResponse represents the asset data returned to the AI agent.
  137. type AssetResponse struct {
  138. // The asset data, as a map of asset sets.
  139. Assets map[string]*AssetSet `json:"assets"`
  140. }
  141. // AssetSet represents a set of asset data.
  142. type AssetSet struct {
  143. // The name of the asset set.
  144. Name string `json:"name"`
  145. // The asset data for the set.
  146. Assets []*Asset `json:"assets"`
  147. }
  148. // Asset represents a single asset data point.
  149. type Asset struct {
  150. Type string `json:"type"`
  151. Properties AssetProperties `json:"properties"`
  152. Labels map[string]string `json:"labels,omitempty"`
  153. Start time.Time `json:"start"`
  154. End time.Time `json:"end"`
  155. Minutes float64 `json:"minutes"`
  156. Adjustment float64 `json:"adjustment"`
  157. TotalCost float64 `json:"totalCost"`
  158. // Disk-specific fields
  159. ByteHours float64 `json:"byteHours,omitempty"`
  160. ByteHoursUsed *float64 `json:"byteHoursUsed,omitempty"`
  161. ByteUsageMax *float64 `json:"byteUsageMax,omitempty"`
  162. StorageClass string `json:"storageClass,omitempty"`
  163. VolumeName string `json:"volumeName,omitempty"`
  164. ClaimName string `json:"claimName,omitempty"`
  165. ClaimNamespace string `json:"claimNamespace,omitempty"`
  166. Local float64 `json:"local,omitempty"`
  167. // Node-specific fields
  168. NodeType string `json:"nodeType,omitempty"`
  169. CPUCoreHours float64 `json:"cpuCoreHours,omitempty"`
  170. RAMByteHours float64 `json:"ramByteHours,omitempty"`
  171. GPUHours float64 `json:"gpuHours,omitempty"`
  172. GPUCount float64 `json:"gpuCount,omitempty"`
  173. CPUCost float64 `json:"cpuCost,omitempty"`
  174. GPUCost float64 `json:"gpuCost,omitempty"`
  175. RAMCost float64 `json:"ramCost,omitempty"`
  176. Discount float64 `json:"discount,omitempty"`
  177. Preemptible float64 `json:"preemptible,omitempty"`
  178. // Breakdown fields (can be used for different types)
  179. Breakdown *AssetBreakdown `json:"breakdown,omitempty"`
  180. CPUBreakdown *AssetBreakdown `json:"cpuBreakdown,omitempty"`
  181. RAMBreakdown *AssetBreakdown `json:"ramBreakdown,omitempty"`
  182. // Overhead (Node-specific)
  183. Overhead *NodeOverhead `json:"overhead,omitempty"`
  184. // LoadBalancer-specific fields
  185. Private bool `json:"private,omitempty"`
  186. Ip string `json:"ip,omitempty"`
  187. // Cloud-specific fields
  188. Credit float64 `json:"credit,omitempty"`
  189. }
  190. // NodeOverhead represents node overhead information
  191. type NodeOverhead struct {
  192. RamOverheadFraction float64 `json:"ramOverheadFraction"`
  193. CpuOverheadFraction float64 `json:"cpuOverheadFraction"`
  194. OverheadCostFraction float64 `json:"overheadCostFraction"`
  195. }
  196. type AssetProperties struct {
  197. Category string `json:"category,omitempty"`
  198. Provider string `json:"provider,omitempty"`
  199. Account string `json:"account,omitempty"`
  200. Project string `json:"project,omitempty"`
  201. Service string `json:"service,omitempty"`
  202. Cluster string `json:"cluster,omitempty"`
  203. Name string `json:"name,omitempty"`
  204. ProviderID string `json:"providerID,omitempty"`
  205. }
  206. type AssetBreakdown struct {
  207. Idle float64 `json:"idle"`
  208. Other float64 `json:"other"`
  209. System float64 `json:"system"`
  210. User float64 `json:"user"`
  211. }
  212. // CloudCostResponse represents the cloud cost data returned to the AI agent.
  213. type CloudCostResponse struct {
  214. // The cloud cost data, as a map of cloud cost sets.
  215. CloudCosts map[string]*CloudCostSet `json:"cloudCosts"`
  216. // Summary information
  217. Summary *CloudCostSummary `json:"summary,omitempty"`
  218. }
  219. // CloudCostSummary provides summary information about cloud costs
  220. type CloudCostSummary struct {
  221. TotalNetCost float64 `json:"totalNetCost"`
  222. TotalAmortizedCost float64 `json:"totalAmortizedCost"`
  223. TotalInvoicedCost float64 `json:"totalInvoicedCost"`
  224. KubernetesPercent float64 `json:"kubernetesPercent"`
  225. ProviderBreakdown map[string]float64 `json:"providerBreakdown,omitempty"`
  226. ServiceBreakdown map[string]float64 `json:"serviceBreakdown,omitempty"`
  227. RegionBreakdown map[string]float64 `json:"regionBreakdown,omitempty"`
  228. }
  229. // CloudCostSet represents a set of cloud cost data.
  230. type CloudCostSet struct {
  231. // The name of the cloud cost set.
  232. Name string `json:"name"`
  233. // The cloud cost data for the set.
  234. CloudCosts []*CloudCost `json:"cloudCosts"`
  235. // Aggregation information
  236. AggregationProperties []string `json:"aggregationProperties,omitempty"`
  237. // Time window
  238. Window *TimeWindow `json:"window,omitempty"`
  239. }
  240. // TimeWindow represents a time range
  241. type TimeWindow struct {
  242. Start time.Time `json:"start"`
  243. End time.Time `json:"end"`
  244. }
  245. // CloudCostProperties defines the properties of a cloud cost item.
  246. type CloudCostProperties struct {
  247. ProviderID string `json:"providerID,omitempty"`
  248. Provider string `json:"provider,omitempty"`
  249. AccountID string `json:"accountID,omitempty"`
  250. AccountName string `json:"accountName,omitempty"`
  251. InvoiceEntityID string `json:"invoiceEntityID,omitempty"`
  252. InvoiceEntityName string `json:"invoiceEntityName,omitempty"`
  253. RegionID string `json:"regionID,omitempty"`
  254. AvailabilityZone string `json:"availabilityZone,omitempty"`
  255. Service string `json:"service,omitempty"`
  256. Category string `json:"category,omitempty"`
  257. Labels map[string]string `json:"labels,omitempty"`
  258. }
  259. // CloudCost represents a single cloud cost data point.
  260. type CloudCost struct {
  261. Properties CloudCostProperties `json:"properties"`
  262. Window TimeWindow `json:"window"`
  263. ListCost CostMetric `json:"listCost"`
  264. NetCost CostMetric `json:"netCost"`
  265. AmortizedNetCost CostMetric `json:"amortizedNetCost"`
  266. InvoicedCost CostMetric `json:"invoicedCost"`
  267. AmortizedCost CostMetric `json:"amortizedCost"`
  268. }
  269. // CostMetric represents a cost value with Kubernetes percentage
  270. type CostMetric struct {
  271. Cost float64 `json:"cost"`
  272. KubernetesPercent float64 `json:"kubernetesPercent"`
  273. }
  274. // EfficiencyResponse represents the efficiency data returned to the AI agent.
  275. type EfficiencyResponse struct {
  276. Efficiencies []*EfficiencyMetric `json:"efficiencies"`
  277. }
  278. // EfficiencyMetric represents efficiency data for a single pod/workload.
  279. type EfficiencyMetric struct {
  280. Name string `json:"name"` // Pod/namespace/controller name based on aggregation
  281. // Current state
  282. CPUEfficiency float64 `json:"cpuEfficiency"` // Usage / Request ratio (0-1+)
  283. MemoryEfficiency float64 `json:"memoryEfficiency"` // Usage / Request ratio (0-1+)
  284. // Current requests and usage
  285. CPUCoresRequested float64 `json:"cpuCoresRequested"`
  286. CPUCoresUsed float64 `json:"cpuCoresUsed"`
  287. RAMBytesRequested float64 `json:"ramBytesRequested"`
  288. RAMBytesUsed float64 `json:"ramBytesUsed"`
  289. // Recommendations (based on actual usage with buffer)
  290. RecommendedCPURequest float64 `json:"recommendedCpuRequest"` // Recommended CPU cores
  291. RecommendedRAMRequest float64 `json:"recommendedRamRequest"` // Recommended RAM bytes
  292. // Resulting efficiency after applying recommendations
  293. ResultingCPUEfficiency float64 `json:"resultingCpuEfficiency"`
  294. ResultingMemoryEfficiency float64 `json:"resultingMemoryEfficiency"`
  295. // Cost analysis
  296. CurrentTotalCost float64 `json:"currentTotalCost"` // Current total cost
  297. RecommendedCost float64 `json:"recommendedCost"` // Estimated cost with recommendations
  298. CostSavings float64 `json:"costSavings"` // Potential savings
  299. CostSavingsPercent float64 `json:"costSavingsPercent"` // Savings as percentage
  300. // Buffer multiplier used for recommendations
  301. EfficiencyBufferMultiplier float64 `json:"efficiencyBufferMultiplier"` // Buffer multiplier applied (e.g., 1.2 for 20% headroom)
  302. // Time window
  303. Start time.Time `json:"start"`
  304. End time.Time `json:"end"`
  305. }
  306. // MCPServer holds the dependencies for the MCP API server.
  307. type MCPServer struct {
  308. costModel *costmodel.CostModel
  309. provider models.Provider
  310. cloudQuerier cloudcost.Querier
  311. }
  312. // NewMCPServer creates a new MCP Server.
  313. func NewMCPServer(costModel *costmodel.CostModel, provider models.Provider, cloudQuerier cloudcost.Querier) *MCPServer {
  314. return &MCPServer{
  315. costModel: costModel,
  316. provider: provider,
  317. cloudQuerier: cloudQuerier,
  318. }
  319. }
  320. // ProcessMCPRequest processes an MCP request and returns an MCP response.
  321. // It accepts a context for proper timeout handling and cancellation.
  322. func (s *MCPServer) ProcessMCPRequest(ctx context.Context, request *MCPRequest) (*MCPResponse, error) {
  323. // 1. Validate Request
  324. if err := validate.Struct(request); err != nil {
  325. return nil, fmt.Errorf("validation failed: %w", err)
  326. }
  327. // 2. Query Dispatching
  328. var data interface{}
  329. var err error
  330. queryStart := time.Now()
  331. switch request.Query.QueryType {
  332. case AllocationQueryType:
  333. data, err = s.QueryAllocations(request.Query)
  334. case AssetQueryType:
  335. data, err = s.QueryAssets(request.Query)
  336. case CloudCostQueryType:
  337. data, err = s.QueryCloudCosts(ctx, request.Query)
  338. case EfficiencyQueryType:
  339. data, err = s.QueryEfficiency(request.Query)
  340. default:
  341. return nil, fmt.Errorf("unsupported query type: %s", request.Query.QueryType)
  342. }
  343. if err != nil {
  344. // Handle error appropriately, maybe return a JSON-RPC error response
  345. return nil, err
  346. }
  347. processingTime := time.Since(queryStart)
  348. // 3. Construct Final Response
  349. mcpResponse := &MCPResponse{
  350. Data: data,
  351. QueryInfo: QueryMetadata{
  352. QueryID: generateQueryID(),
  353. Timestamp: time.Now(),
  354. ProcessingTime: processingTime,
  355. },
  356. }
  357. return mcpResponse, nil
  358. }
  359. // validate is the singleton validator instance.
  360. var validate = validator.New()
  361. func generateQueryID() string {
  362. bytes := make([]byte, 8) // 16 hex characters
  363. if _, err := rand.Read(bytes); err != nil {
  364. // Fallback to timestamp-based ID if crypto/rand fails
  365. return fmt.Sprintf("query-%d", time.Now().UnixNano())
  366. }
  367. return fmt.Sprintf("query-%s", hex.EncodeToString(bytes))
  368. }
  369. func (s *MCPServer) QueryAllocations(query *OpenCostQueryRequest) (*AllocationResponse, error) {
  370. // 1. Parse Window
  371. window, err := opencost.ParseWindowWithOffset(query.Window, 0) // 0 offset for UTC
  372. if err != nil {
  373. return nil, fmt.Errorf("failed to parse window '%s': %w", query.Window, err)
  374. }
  375. // 2. Set default parameters
  376. var step time.Duration
  377. var aggregateBy []string
  378. var includeIdle, idleByNode, includeProportionalAssetResourceCosts, includeAggregatedMetadata, sharedLoadBalancer, shareIdle bool
  379. var accumulateBy opencost.AccumulateOption
  380. var filterString string
  381. // 3. Parse allocation parameters if provided
  382. if query.AllocationParams != nil {
  383. // Set step duration (default to window duration if not specified)
  384. if query.AllocationParams.Step > 0 {
  385. step = query.AllocationParams.Step
  386. } else {
  387. step = window.Duration()
  388. }
  389. // Parse aggregation properties
  390. if query.AllocationParams.Aggregate != "" {
  391. aggregateBy = strings.Split(query.AllocationParams.Aggregate, ",")
  392. }
  393. // Set boolean parameters
  394. includeIdle = query.AllocationParams.IncludeIdle
  395. idleByNode = query.AllocationParams.IdleByNode
  396. includeProportionalAssetResourceCosts = query.AllocationParams.IncludeProportionalAssetResourceCosts
  397. includeAggregatedMetadata = query.AllocationParams.IncludeAggregatedMetadata
  398. sharedLoadBalancer = query.AllocationParams.ShareLB
  399. shareIdle = query.AllocationParams.ShareIdle
  400. // Set filter string
  401. filterString = query.AllocationParams.Filter
  402. // Validate filter string if provided
  403. if filterString != "" {
  404. parser := allocation.NewAllocationFilterParser()
  405. _, err := parser.Parse(filterString)
  406. if err != nil {
  407. return nil, fmt.Errorf("invalid allocation filter '%s': %w", filterString, err)
  408. }
  409. }
  410. // Set accumulation option
  411. if query.AllocationParams.Accumulate {
  412. accumulateBy = opencost.AccumulateOptionAll
  413. } else {
  414. accumulateBy = opencost.AccumulateOptionNone
  415. }
  416. } else {
  417. // Default values when no parameters provided
  418. step = window.Duration()
  419. accumulateBy = opencost.AccumulateOptionNone
  420. filterString = ""
  421. }
  422. // 4. Call the existing QueryAllocation function with all parameters
  423. asr, err := s.costModel.QueryAllocation(
  424. window,
  425. step,
  426. aggregateBy,
  427. includeIdle,
  428. idleByNode,
  429. includeProportionalAssetResourceCosts,
  430. includeAggregatedMetadata,
  431. sharedLoadBalancer,
  432. accumulateBy,
  433. shareIdle,
  434. filterString,
  435. )
  436. if err != nil {
  437. return nil, fmt.Errorf("failed to query allocations: %w", err)
  438. }
  439. // 5. Handle the AllocationSetRange result
  440. if asr == nil || len(asr.Allocations) == 0 {
  441. return &AllocationResponse{
  442. Allocations: make(map[string]*AllocationSet),
  443. }, nil
  444. }
  445. // 6. Transform the result to MCP format
  446. // If we have multiple sets, we'll combine them or return the first one
  447. // For now, let's return the first allocation set
  448. firstSet := asr.Allocations[0]
  449. return transformAllocationSet(firstSet), nil
  450. }
  451. // transformAllocationSet converts an opencost.AllocationSet into the MCP's AllocationResponse format.
  452. func transformAllocationSet(allocSet *opencost.AllocationSet) *AllocationResponse {
  453. if allocSet == nil {
  454. return &AllocationResponse{Allocations: make(map[string]*AllocationSet)}
  455. }
  456. mcpAllocations := make(map[string]*AllocationSet)
  457. // Create a single set for all allocations
  458. mcpSet := &AllocationSet{
  459. Name: "allocations",
  460. Allocations: []*Allocation{},
  461. }
  462. // Convert each allocation
  463. for _, alloc := range allocSet.Allocations {
  464. if alloc == nil {
  465. continue
  466. }
  467. mcpAlloc := &Allocation{
  468. Name: alloc.Name,
  469. CPUCost: alloc.CPUCost,
  470. GPUCost: alloc.GPUCost,
  471. RAMCost: alloc.RAMCost,
  472. PVCost: alloc.PVCost(), // Call the method
  473. NetworkCost: alloc.NetworkCost,
  474. SharedCost: alloc.SharedCost,
  475. ExternalCost: alloc.ExternalCost,
  476. TotalCost: alloc.TotalCost(),
  477. CPUCoreHours: alloc.CPUCoreHours,
  478. RAMByteHours: alloc.RAMByteHours,
  479. GPUHours: alloc.GPUHours,
  480. PVByteHours: alloc.PVBytes(), // Use the method directly
  481. Start: alloc.Start,
  482. End: alloc.End,
  483. }
  484. mcpSet.Allocations = append(mcpSet.Allocations, mcpAlloc)
  485. }
  486. mcpAllocations["allocations"] = mcpSet
  487. return &AllocationResponse{
  488. Allocations: mcpAllocations,
  489. }
  490. }
  491. func (s *MCPServer) QueryAssets(query *OpenCostQueryRequest) (*AssetResponse, error) {
  492. // 1. Parse Window
  493. window, err := opencost.ParseWindowWithOffset(query.Window, 0) // 0 offset for UTC
  494. if err != nil {
  495. return nil, fmt.Errorf("failed to parse window '%s': %w", query.Window, err)
  496. }
  497. // 2. Set Query Options
  498. start := *window.Start()
  499. end := *window.End()
  500. // 3. Call CostModel to get the asset set
  501. assetSet, err := s.costModel.ComputeAssets(start, end)
  502. if err != nil {
  503. return nil, fmt.Errorf("failed to compute assets: %w", err)
  504. }
  505. // 4. Transform Response for the MCP API
  506. return transformAssetSet(assetSet), nil
  507. }
  508. // transformAssetSet converts a opencost.AssetSet into the MCP's AssetResponse format.
  509. func transformAssetSet(assetSet *opencost.AssetSet) *AssetResponse {
  510. if assetSet == nil {
  511. return &AssetResponse{Assets: make(map[string]*AssetSet)}
  512. }
  513. mcpAssets := make(map[string]*AssetSet)
  514. // Create a single set for all assets
  515. mcpSet := &AssetSet{
  516. Name: "assets",
  517. Assets: []*Asset{},
  518. }
  519. for _, asset := range assetSet.Assets {
  520. if asset == nil {
  521. continue
  522. }
  523. properties := asset.GetProperties()
  524. labels := asset.GetLabels()
  525. mcpAsset := &Asset{
  526. Type: asset.Type().String(),
  527. Properties: AssetProperties{
  528. Category: properties.Category,
  529. Provider: properties.Provider,
  530. Account: properties.Account,
  531. Project: properties.Project,
  532. Service: properties.Service,
  533. Cluster: properties.Cluster,
  534. Name: properties.Name,
  535. ProviderID: properties.ProviderID,
  536. },
  537. Labels: labels,
  538. Start: asset.GetStart(),
  539. End: asset.GetEnd(),
  540. Minutes: asset.Minutes(),
  541. Adjustment: asset.GetAdjustment(),
  542. TotalCost: asset.TotalCost(),
  543. }
  544. // Handle type-specific fields
  545. switch a := asset.(type) {
  546. case *opencost.Disk:
  547. mcpAsset.ByteHours = a.ByteHours
  548. mcpAsset.ByteHoursUsed = a.ByteHoursUsed
  549. mcpAsset.ByteUsageMax = a.ByteUsageMax
  550. mcpAsset.StorageClass = a.StorageClass
  551. mcpAsset.VolumeName = a.VolumeName
  552. mcpAsset.ClaimName = a.ClaimName
  553. mcpAsset.ClaimNamespace = a.ClaimNamespace
  554. mcpAsset.Local = a.Local
  555. if a.Breakdown != nil {
  556. mcpAsset.Breakdown = &AssetBreakdown{
  557. Idle: a.Breakdown.Idle,
  558. Other: a.Breakdown.Other,
  559. System: a.Breakdown.System,
  560. User: a.Breakdown.User,
  561. }
  562. }
  563. case *opencost.Node:
  564. mcpAsset.NodeType = a.NodeType
  565. mcpAsset.CPUCoreHours = a.CPUCoreHours
  566. mcpAsset.RAMByteHours = a.RAMByteHours
  567. mcpAsset.GPUHours = a.GPUHours
  568. mcpAsset.GPUCount = a.GPUCount
  569. mcpAsset.CPUCost = a.CPUCost
  570. mcpAsset.GPUCost = a.GPUCost
  571. mcpAsset.RAMCost = a.RAMCost
  572. mcpAsset.Discount = a.Discount
  573. mcpAsset.Preemptible = a.Preemptible
  574. if a.CPUBreakdown != nil {
  575. mcpAsset.CPUBreakdown = &AssetBreakdown{
  576. Idle: a.CPUBreakdown.Idle,
  577. Other: a.CPUBreakdown.Other,
  578. System: a.CPUBreakdown.System,
  579. User: a.CPUBreakdown.User,
  580. }
  581. }
  582. if a.RAMBreakdown != nil {
  583. mcpAsset.RAMBreakdown = &AssetBreakdown{
  584. Idle: a.RAMBreakdown.Idle,
  585. Other: a.RAMBreakdown.Other,
  586. System: a.RAMBreakdown.System,
  587. User: a.RAMBreakdown.User,
  588. }
  589. }
  590. if a.Overhead != nil {
  591. mcpAsset.Overhead = &NodeOverhead{
  592. RamOverheadFraction: a.Overhead.RamOverheadFraction,
  593. CpuOverheadFraction: a.Overhead.CpuOverheadFraction,
  594. OverheadCostFraction: a.Overhead.OverheadCostFraction,
  595. }
  596. }
  597. case *opencost.LoadBalancer:
  598. mcpAsset.Private = a.Private
  599. mcpAsset.Ip = a.Ip
  600. case *opencost.Network:
  601. // Network assets have no specific fields beyond the base asset structure
  602. // All relevant data is in Properties, Labels, Cost, etc.
  603. case *opencost.Cloud:
  604. mcpAsset.Credit = a.Credit
  605. case *opencost.ClusterManagement:
  606. // ClusterManagement assets have no specific fields beyond the base asset structure
  607. // All relevant data is in Properties, Labels, Cost, etc.
  608. }
  609. mcpSet.Assets = append(mcpSet.Assets, mcpAsset)
  610. }
  611. mcpAssets["assets"] = mcpSet
  612. return &AssetResponse{
  613. Assets: mcpAssets,
  614. }
  615. }
  616. // QueryCloudCosts translates an MCP query into a CloudCost repository query and transforms the result.
  617. // The ctx parameter is used for timeout and cancellation handling of the cloud cost query.
  618. func (s *MCPServer) QueryCloudCosts(ctx context.Context, query *OpenCostQueryRequest) (*CloudCostResponse, error) {
  619. // 1. Check if cloud cost querier is available
  620. if s.cloudQuerier == nil {
  621. return nil, fmt.Errorf("cloud cost querier not configured - check cloud-integration.json file")
  622. }
  623. // 2. Parse Window
  624. window, err := opencost.ParseWindowWithOffset(query.Window, 0) // 0 offset for UTC
  625. if err != nil {
  626. return nil, fmt.Errorf("failed to parse window '%s': %w", query.Window, err)
  627. }
  628. // 3. Build query request
  629. request := cloudcost.QueryRequest{
  630. Start: *window.Start(),
  631. End: *window.End(),
  632. Filter: nil, // Will be set from CloudCostParams if provided
  633. }
  634. // 4. Apply filtering and aggregation from CloudCostParams
  635. if query.CloudCostParams != nil {
  636. request = s.buildCloudCostQueryRequest(request, query.CloudCostParams)
  637. }
  638. // 5. Create a timeout context for the query with configured timeout
  639. queryTimeout := env.GetMCPQueryTimeout()
  640. queryCtx, cancel := context.WithTimeout(ctx, queryTimeout)
  641. defer cancel()
  642. // 6. Query the repository (this handles multiple cloud providers automatically)
  643. ccsr, err := s.cloudQuerier.Query(queryCtx, request)
  644. if err != nil {
  645. return nil, fmt.Errorf("failed to query cloud costs: %w", err)
  646. }
  647. // 7. Transform Response
  648. return transformCloudCostSetRange(ccsr), nil
  649. }
  650. // buildCloudCostQueryRequest builds a QueryRequest from CloudCostParams
  651. func (s *MCPServer) buildCloudCostQueryRequest(request cloudcost.QueryRequest, params *CloudCostQuery) cloudcost.QueryRequest {
  652. // Set aggregation
  653. if params.Aggregate != "" {
  654. aggregateBy := strings.Split(params.Aggregate, ",")
  655. request.AggregateBy = aggregateBy
  656. }
  657. // Set accumulation
  658. if params.Accumulate != "" {
  659. request.Accumulate = opencost.ParseAccumulate(params.Accumulate)
  660. }
  661. // Build filter from individual parameters or filter string
  662. var filter filter.Filter
  663. var err error
  664. if params.Filter != "" {
  665. // Parse the filter string directly
  666. parser := cloudcostfilter.NewCloudCostFilterParser()
  667. filter, err = parser.Parse(params.Filter)
  668. if err != nil {
  669. // Log error but continue without filter rather than failing the entire request
  670. log.Warnf("failed to parse filter string '%s': %v", params.Filter, err)
  671. }
  672. } else {
  673. // Build filter from individual parameters
  674. filter = s.buildFilterFromParams(params)
  675. }
  676. request.Filter = filter
  677. return request
  678. }
  679. // buildFilterFromParams creates a filter from individual CloudCostQuery parameters
  680. func (s *MCPServer) buildFilterFromParams(params *CloudCostQuery) filter.Filter {
  681. var filterParts []string
  682. // Add provider filter
  683. if params.Provider != "" {
  684. filterParts = append(filterParts, fmt.Sprintf(`provider:"%s"`, params.Provider))
  685. }
  686. // Add providerID filter
  687. if params.ProviderID != "" {
  688. filterParts = append(filterParts, fmt.Sprintf(`providerID:"%s"`, params.ProviderID))
  689. }
  690. // Add service filter
  691. if params.Service != "" {
  692. filterParts = append(filterParts, fmt.Sprintf(`service:"%s"`, params.Service))
  693. }
  694. // Add category filter
  695. if params.Category != "" {
  696. filterParts = append(filterParts, fmt.Sprintf(`category:"%s"`, params.Category))
  697. }
  698. // Region is intentionally not supported here
  699. // Add account filter (maps to accountID)
  700. if params.AccountID != "" {
  701. filterParts = append(filterParts, fmt.Sprintf(`accountID:"%s"`, params.AccountID))
  702. }
  703. // Add invoiceEntityID filter
  704. if params.InvoiceEntityID != "" {
  705. filterParts = append(filterParts, fmt.Sprintf(`invoiceEntityID:"%s"`, params.InvoiceEntityID))
  706. }
  707. // Add label filters (label[key]:"value")
  708. if len(params.Labels) > 0 {
  709. for k, v := range params.Labels {
  710. if k == "" {
  711. continue
  712. }
  713. filterParts = append(filterParts, fmt.Sprintf(`label[%s]:"%s"`, k, v))
  714. }
  715. }
  716. // If no filters specified, return nil
  717. if len(filterParts) == 0 {
  718. return nil
  719. }
  720. // Combine all filter parts with AND logic (parser expects 'and')
  721. filterString := strings.Join(filterParts, " and ")
  722. // Parse the combined filter string
  723. parser := cloudcostfilter.NewCloudCostFilterParser()
  724. filter, err := parser.Parse(filterString)
  725. if err != nil {
  726. // Log error but return nil rather than failing
  727. log.Warnf("failed to parse combined filter '%s': %v", filterString, err)
  728. return nil
  729. }
  730. return filter
  731. }
  732. // transformCloudCostSetRange converts a opencost.CloudCostSetRange into the MCP's CloudCostResponse format.
  733. func transformCloudCostSetRange(ccsr *opencost.CloudCostSetRange) *CloudCostResponse {
  734. if ccsr == nil || len(ccsr.CloudCostSets) == 0 {
  735. return &CloudCostResponse{
  736. CloudCosts: make(map[string]*CloudCostSet),
  737. Summary: &CloudCostSummary{
  738. TotalNetCost: 0,
  739. },
  740. }
  741. }
  742. mcpCloudCosts := make(map[string]*CloudCostSet)
  743. var totalNetCost, totalAmortizedCost, totalInvoicedCost float64
  744. providerBreakdown := make(map[string]float64)
  745. serviceBreakdown := make(map[string]float64)
  746. regionBreakdown := make(map[string]float64)
  747. // Process each cloud cost set in the range
  748. for i, ccSet := range ccsr.CloudCostSets {
  749. if ccSet == nil {
  750. log.Warnf("transformCloudCostSetRange: skipping nil CloudCostSet at index %d", i)
  751. continue
  752. }
  753. // Check for nil Window or nil Start/End pointers before dereferencing
  754. if ccSet.Window.Start() == nil || ccSet.Window.End() == nil {
  755. log.Warnf("transformCloudCostSetRange: skipping CloudCostSet at index %d with invalid window (start=%v, end=%v)", i, ccSet.Window.Start(), ccSet.Window.End())
  756. continue
  757. }
  758. setName := fmt.Sprintf("cloudcosts_%d", i)
  759. mcpSet := &CloudCostSet{
  760. Name: setName,
  761. CloudCosts: []*CloudCost{},
  762. AggregationProperties: ccSet.AggregationProperties,
  763. Window: &TimeWindow{
  764. Start: *ccSet.Window.Start(),
  765. End: *ccSet.Window.End(),
  766. },
  767. }
  768. // Convert each cloud cost item
  769. for _, item := range ccSet.CloudCosts {
  770. if item == nil {
  771. log.Warnf("transformCloudCostSetRange: skipping nil CloudCost item in set %s", setName)
  772. continue
  773. }
  774. // Check for nil Window or nil Start/End pointers on the item
  775. if item.Window.Start() == nil || item.Window.End() == nil {
  776. log.Warnf("transformCloudCostSetRange: skipping CloudCost item with invalid window (start=%v, end=%v) in set %s", item.Window.Start(), item.Window.End(), setName)
  777. continue
  778. }
  779. mcpCC := &CloudCost{
  780. Properties: CloudCostProperties{
  781. ProviderID: item.Properties.ProviderID,
  782. Provider: item.Properties.Provider,
  783. AccountID: item.Properties.AccountID,
  784. AccountName: item.Properties.AccountName,
  785. InvoiceEntityID: item.Properties.InvoiceEntityID,
  786. InvoiceEntityName: item.Properties.InvoiceEntityName,
  787. RegionID: item.Properties.RegionID,
  788. AvailabilityZone: item.Properties.AvailabilityZone,
  789. Service: item.Properties.Service,
  790. Category: item.Properties.Category,
  791. Labels: item.Properties.Labels,
  792. },
  793. Window: TimeWindow{
  794. Start: *item.Window.Start(),
  795. End: *item.Window.End(),
  796. },
  797. ListCost: CostMetric{
  798. Cost: item.ListCost.Cost,
  799. KubernetesPercent: item.ListCost.KubernetesPercent,
  800. },
  801. NetCost: CostMetric{
  802. Cost: item.NetCost.Cost,
  803. KubernetesPercent: item.NetCost.KubernetesPercent,
  804. },
  805. AmortizedNetCost: CostMetric{
  806. Cost: item.AmortizedNetCost.Cost,
  807. KubernetesPercent: item.AmortizedNetCost.KubernetesPercent,
  808. },
  809. InvoicedCost: CostMetric{
  810. Cost: item.InvoicedCost.Cost,
  811. KubernetesPercent: item.InvoicedCost.KubernetesPercent,
  812. },
  813. AmortizedCost: CostMetric{
  814. Cost: item.AmortizedCost.Cost,
  815. KubernetesPercent: item.AmortizedCost.KubernetesPercent,
  816. },
  817. }
  818. mcpSet.CloudCosts = append(mcpSet.CloudCosts, mcpCC)
  819. // Update summary totals
  820. totalNetCost += item.NetCost.Cost
  821. totalAmortizedCost += item.AmortizedNetCost.Cost
  822. totalInvoicedCost += item.InvoicedCost.Cost
  823. // Update breakdowns
  824. providerBreakdown[item.Properties.Provider] += item.NetCost.Cost
  825. serviceBreakdown[item.Properties.Service] += item.NetCost.Cost
  826. regionBreakdown[item.Properties.RegionID] += item.NetCost.Cost
  827. }
  828. mcpCloudCosts[setName] = mcpSet
  829. }
  830. // Calculate cost-weighted average Kubernetes percentage (by NetCost)
  831. var avgKubernetesPercent float64
  832. var numerator, denominator float64
  833. for _, ccSet := range ccsr.CloudCostSets {
  834. if ccSet == nil {
  835. log.Warnf("transformCloudCostSetRange: skipping nil CloudCostSet in Kubernetes percent calculation")
  836. continue
  837. }
  838. // Skip sets with invalid windows (consistent with first loop)
  839. if ccSet.Window.Start() == nil || ccSet.Window.End() == nil {
  840. log.Warnf("transformCloudCostSetRange: skipping CloudCostSet with invalid window (start=%v, end=%v) in Kubernetes percent calculation", ccSet.Window.Start(), ccSet.Window.End())
  841. continue
  842. }
  843. for _, item := range ccSet.CloudCosts {
  844. if item == nil {
  845. log.Warnf("transformCloudCostSetRange: skipping nil CloudCost item in Kubernetes percent calculation")
  846. continue
  847. }
  848. // Skip items with invalid windows (consistent with first loop)
  849. if item.Window.Start() == nil || item.Window.End() == nil {
  850. log.Warnf("transformCloudCostSetRange: skipping CloudCost item with invalid window (start=%v, end=%v) in Kubernetes percent calculation", item.Window.Start(), item.Window.End())
  851. continue
  852. }
  853. cost := item.NetCost.Cost
  854. percent := item.NetCost.KubernetesPercent
  855. if cost <= 0 {
  856. continue
  857. }
  858. numerator += cost * percent
  859. denominator += cost
  860. }
  861. }
  862. if denominator > 0 {
  863. avgKubernetesPercent = numerator / denominator
  864. }
  865. summary := &CloudCostSummary{
  866. TotalNetCost: totalNetCost,
  867. TotalAmortizedCost: totalAmortizedCost,
  868. TotalInvoicedCost: totalInvoicedCost,
  869. KubernetesPercent: avgKubernetesPercent,
  870. ProviderBreakdown: providerBreakdown,
  871. ServiceBreakdown: serviceBreakdown,
  872. RegionBreakdown: regionBreakdown,
  873. }
  874. return &CloudCostResponse{
  875. CloudCosts: mcpCloudCosts,
  876. Summary: summary,
  877. }
  878. }
  879. // QueryEfficiency queries allocation data and computes efficiency metrics with recommendations.
  880. func (s *MCPServer) QueryEfficiency(query *OpenCostQueryRequest) (*EfficiencyResponse, error) {
  881. // 1. Parse Window
  882. window, err := opencost.ParseWindowWithOffset(query.Window, 0)
  883. if err != nil {
  884. return nil, fmt.Errorf("failed to parse window '%s': %w", query.Window, err)
  885. }
  886. // 2. Set default parameters
  887. var aggregateBy []string
  888. var filterString string
  889. var bufferMultiplier float64 = efficiencyBufferMultiplier // Default to 1.2 (20% headroom)
  890. // 3. Parse efficiency parameters if provided
  891. if query.EfficiencyParams != nil {
  892. // Parse aggregation properties (default to pod if not specified)
  893. if query.EfficiencyParams.Aggregate != "" {
  894. aggregateBy = strings.Split(query.EfficiencyParams.Aggregate, ",")
  895. } else {
  896. aggregateBy = []string{"pod"}
  897. }
  898. // Set filter string
  899. filterString = query.EfficiencyParams.Filter
  900. // Validate filter string if provided
  901. if filterString != "" {
  902. parser := allocation.NewAllocationFilterParser()
  903. _, err := parser.Parse(filterString)
  904. if err != nil {
  905. return nil, fmt.Errorf("invalid allocation filter '%s': %w", filterString, err)
  906. }
  907. }
  908. // Set buffer multiplier if provided, otherwise use default
  909. if query.EfficiencyParams.EfficiencyBufferMultiplier != nil {
  910. bufferMultiplier = *query.EfficiencyParams.EfficiencyBufferMultiplier
  911. }
  912. } else {
  913. // Default to pod-level aggregation
  914. aggregateBy = []string{"pod"}
  915. filterString = ""
  916. }
  917. // 4. Query allocations with the specified parameters
  918. // Use the entire window as step to get aggregated data
  919. step := window.Duration()
  920. asr, err := s.costModel.QueryAllocation(
  921. window,
  922. step,
  923. aggregateBy,
  924. false, // includeIdle
  925. false, // idleByNode
  926. false, // includeProportionalAssetResourceCosts
  927. false, // includeAggregatedMetadata
  928. false, // sharedLoadBalancer
  929. opencost.AccumulateOptionNone,
  930. false, // shareIdle
  931. filterString,
  932. )
  933. if err != nil {
  934. return nil, fmt.Errorf("failed to query allocations: %w", err)
  935. }
  936. // 5. Handle empty results
  937. if asr == nil || len(asr.Allocations) == 0 {
  938. return &EfficiencyResponse{
  939. Efficiencies: []*EfficiencyMetric{},
  940. }, nil
  941. }
  942. // 6. Compute efficiency metrics from allocations using concurrent processing
  943. var (
  944. mu sync.Mutex
  945. wg sync.WaitGroup
  946. efficiencies = make([]*EfficiencyMetric, 0)
  947. )
  948. // Process each allocation set (typically one per time window) concurrently
  949. for _, allocSet := range asr.Allocations {
  950. if allocSet == nil {
  951. continue
  952. }
  953. // Process this allocation set in a goroutine
  954. wg.Add(1)
  955. go func(allocSet *opencost.AllocationSet) {
  956. defer wg.Done()
  957. // Compute metrics for all allocations in this set
  958. localMetrics := make([]*EfficiencyMetric, 0, len(allocSet.Allocations))
  959. for _, alloc := range allocSet.Allocations {
  960. if metric := computeEfficiencyMetric(alloc, bufferMultiplier); metric != nil {
  961. localMetrics = append(localMetrics, metric)
  962. }
  963. }
  964. // Append results to shared slice (thread-safe)
  965. if len(localMetrics) > 0 {
  966. mu.Lock()
  967. efficiencies = append(efficiencies, localMetrics...)
  968. mu.Unlock()
  969. }
  970. }(allocSet)
  971. }
  972. // Wait for all goroutines to complete
  973. wg.Wait()
  974. return &EfficiencyResponse{
  975. Efficiencies: efficiencies,
  976. }, nil
  977. }
  978. // safeDiv performs division and returns 0 if denominator is 0.
  979. func safeDiv(numerator, denominator float64) float64 {
  980. if denominator == 0 {
  981. return 0
  982. }
  983. return numerator / denominator
  984. }
  985. // computeEfficiencyMetric calculates efficiency metrics for a single allocation.
  986. func computeEfficiencyMetric(alloc *opencost.Allocation, bufferMultiplier float64) *EfficiencyMetric {
  987. if alloc == nil {
  988. return nil
  989. }
  990. // Calculate time duration in hours
  991. hours := alloc.Minutes() / 60.0
  992. if hours <= 0 {
  993. return nil
  994. }
  995. // Get current usage (average over the period)
  996. cpuCoresUsed := alloc.CPUCoreHours / hours
  997. ramBytesUsed := alloc.RAMByteHours / hours
  998. // Get requested amounts
  999. cpuCoresRequested := alloc.CPUCoreRequestAverage
  1000. ramBytesRequested := alloc.RAMBytesRequestAverage
  1001. // Calculate current efficiency (will be 0 if no requests are set)
  1002. cpuEfficiency := safeDiv(cpuCoresUsed, cpuCoresRequested)
  1003. memoryEfficiency := safeDiv(ramBytesUsed, ramBytesRequested)
  1004. // Calculate recommendations with buffer for headroom
  1005. recommendedCPU := cpuCoresUsed * bufferMultiplier
  1006. recommendedRAM := ramBytesUsed * bufferMultiplier
  1007. // Ensure recommendations meet minimum thresholds
  1008. if recommendedCPU < efficiencyMinCPU {
  1009. recommendedCPU = efficiencyMinCPU
  1010. }
  1011. if recommendedRAM < efficiencyMinRAM {
  1012. recommendedRAM = efficiencyMinRAM
  1013. }
  1014. // Calculate resulting efficiency after applying recommendations
  1015. resultingCPUEff := safeDiv(cpuCoresUsed, recommendedCPU)
  1016. resultingMemEff := safeDiv(ramBytesUsed, recommendedRAM)
  1017. // Calculate cost per unit based on REQUESTED amounts (not used amounts)
  1018. // This gives us the cost per core-hour or byte-hour that the cluster charges
  1019. cpuCostPerCoreHour := safeDiv(alloc.CPUCost, cpuCoresRequested*hours)
  1020. ramCostPerByteHour := safeDiv(alloc.RAMCost, ramBytesRequested*hours)
  1021. // Current total cost
  1022. currentTotalCost := alloc.TotalCost()
  1023. // Estimate recommended cost based on recommended requests
  1024. recommendedCPUCost := recommendedCPU * hours * cpuCostPerCoreHour
  1025. recommendedRAMCost := recommendedRAM * hours * ramCostPerByteHour
  1026. // Keep other costs the same (PV, network, shared, external, GPU)
  1027. otherCosts := alloc.PVCost() + alloc.NetworkCost + alloc.SharedCost + alloc.ExternalCost + alloc.GPUCost
  1028. recommendedTotalCost := recommendedCPUCost + recommendedRAMCost + otherCosts
  1029. // Clamp recommended cost to avoid rounding issues making it higher than current
  1030. if recommendedTotalCost > currentTotalCost && (recommendedTotalCost-currentTotalCost) < 0.0001 {
  1031. recommendedTotalCost = currentTotalCost
  1032. }
  1033. // Calculate savings
  1034. costSavings := currentTotalCost - recommendedTotalCost
  1035. costSavingsPercent := safeDiv(costSavings, currentTotalCost) * 100
  1036. return &EfficiencyMetric{
  1037. Name: alloc.Name,
  1038. CPUEfficiency: cpuEfficiency,
  1039. MemoryEfficiency: memoryEfficiency,
  1040. CPUCoresRequested: cpuCoresRequested,
  1041. CPUCoresUsed: cpuCoresUsed,
  1042. RAMBytesRequested: ramBytesRequested,
  1043. RAMBytesUsed: ramBytesUsed,
  1044. RecommendedCPURequest: recommendedCPU,
  1045. RecommendedRAMRequest: recommendedRAM,
  1046. ResultingCPUEfficiency: resultingCPUEff,
  1047. ResultingMemoryEfficiency: resultingMemEff,
  1048. CurrentTotalCost: currentTotalCost,
  1049. RecommendedCost: recommendedTotalCost,
  1050. CostSavings: costSavings,
  1051. CostSavingsPercent: costSavingsPercent,
  1052. EfficiencyBufferMultiplier: bufferMultiplier,
  1053. Start: alloc.Start,
  1054. End: alloc.End,
  1055. }
  1056. }