Sfoglia il codice sorgente

feat: expose a step parameter in the get_efficiency tool (#3869)

Signed-off-by: Nikita <nikita.ozhyhin@phantom.app>
nikita-phantom-ops 1 giorno fa
parent
commit
d3dd1f66d3
4 ha cambiato i file con 129 aggiunte e 10 eliminazioni
  1. 18 0
      README.md
  2. 20 3
      pkg/cmd/costmodel/costmodel.go
  3. 47 7
      pkg/mcp/server.go
  4. 44 0
      pkg/mcp/server_test.go

+ 18 - 0
README.md

@@ -223,6 +223,16 @@ Retrieve cloud cost data with provider, service, and region filtering.
 - `region` (optional): Filter by region (e.g., "us-west-1", "us-central1")
 - `accountID` (optional): Filter by account ID
 
+#### `get_efficiency`
+Retrieve resource efficiency metrics with rightsizing recommendations and cost savings analysis.
+
+**Parameters:**
+- `window` (required): Time window (e.g., "7d", "1h", "30m")
+- `aggregate` (optional): Aggregation properties (e.g., "pod", "namespace", "controller")
+- `filter` (optional): Filter expression for allocations
+- `buffer_multiplier` (optional): Buffer multiplier for recommendations (default: 1.2 for 20% headroom)
+- `step` (optional): Query step size (e.g., "1h", "6h"); smaller steps reduce peak memory by batching large windows, but may increase query time/requests
+
 ### Supported Asset Types
 
 - **Node**: Compute instances with CPU, RAM, GPU details
@@ -257,6 +267,14 @@ const cloudCosts = await mcpClient.callTool('get_cloud_costs', {
   accumulate: 'day',
   filter: 'regionID:"us-west-1"'
 });
+
+// Get efficiency metrics with rightsizing recommendations
+const efficiency = await mcpClient.callTool('get_efficiency', {
+  window: '7d',
+  aggregate: 'namespace,controller',
+  step: '6h',
+  buffer_multiplier: 1.2
+});
 ```
 
 For detailed setup instructions and advanced configuration, see the [Helm chart documentation](https://github.com/opencost/opencost-helm-chart/blob/main/charts/opencost/README.md#mcp-server).

+ 20 - 3
pkg/cmd/costmodel/costmodel.go

@@ -11,6 +11,7 @@ import (
 
 	"github.com/julienschmidt/httprouter"
 	"github.com/opencost/opencost/core/pkg/util/apiutil"
+	"github.com/opencost/opencost/core/pkg/util/timeutil"
 	"github.com/opencost/opencost/pkg/cloudcost"
 	"github.com/opencost/opencost/pkg/customcost"
 	"github.com/prometheus/client_golang/prometheus/promhttp"
@@ -191,14 +192,16 @@ func StartMCPServer(ctx context.Context, accesses *costmodel.Accesses, cloudCost
 
 	// Define tool handlers
 	handleAllocationCosts := func(ctx context.Context, req *mcp_sdk.CallToolRequest, args AllocationArgs) (*mcp_sdk.CallToolResult, interface{}, error) {
-		// Parse step duration if provided
 		var step time.Duration
-		var err error
 		if args.Step != "" {
-			step, err = time.ParseDuration(args.Step)
+			var err error
+			step, err = timeutil.ParseDuration(args.Step)
 			if err != nil {
 				return nil, nil, fmt.Errorf("invalid step duration '%s': %w", args.Step, err)
 			}
+			if step <= 0 {
+				return nil, nil, fmt.Errorf("invalid step duration '%s': must be > 0", args.Step)
+			}
 		}
 
 		queryRequest := &opencost_mcp.OpenCostQueryRequest{
@@ -278,10 +281,23 @@ func StartMCPServer(ctx context.Context, accesses *costmodel.Accesses, cloudCost
 	}
 
 	handleEfficiency := func(ctx context.Context, req *mcp_sdk.CallToolRequest, args EfficiencyArgs) (*mcp_sdk.CallToolResult, interface{}, error) {
+		var step time.Duration
+		if args.Step != "" {
+			var err error
+			step, err = timeutil.ParseDuration(args.Step)
+			if err != nil {
+				return nil, nil, fmt.Errorf("invalid step duration '%s': %w", args.Step, err)
+			}
+			if step <= 0 {
+				return nil, nil, fmt.Errorf("invalid step duration '%s': must be > 0", args.Step)
+			}
+		}
+
 		queryRequest := &opencost_mcp.OpenCostQueryRequest{
 			QueryType: opencost_mcp.EfficiencyQueryType,
 			Window:    args.Window,
 			EfficiencyParams: &opencost_mcp.EfficiencyQuery{
+				Step:                       step,
 				Aggregate:                  args.Aggregate,
 				Filter:                     args.Filter,
 				EfficiencyBufferMultiplier: args.BufferMultiplier,
@@ -408,4 +424,5 @@ type EfficiencyArgs struct {
 	Aggregate        string   `json:"aggregate,omitempty"`         // Aggregation level (e.g., "pod", "namespace", "controller")
 	Filter           string   `json:"filter,omitempty"`            // Filter expression (same as allocation filters)
 	BufferMultiplier *float64 `json:"buffer_multiplier,omitempty"` // Buffer multiplier for recommendations (default: 1.2 for 20% headroom, e.g., 1.4 for 40%)
+	Step             string   `json:"step,omitempty"`              // Query step size (e.g., "1h", "6h"); smaller steps reduce peak memory by batching large windows, but may increase query time/requests
 }

+ 47 - 7
pkg/mcp/server.go

@@ -107,9 +107,10 @@ type CloudCostQuery struct {
 
 // EfficiencyQuery contains the parameters for an efficiency query.
 type EfficiencyQuery struct {
-	Aggregate                  string   `json:"aggregate,omitempty"`                  // Aggregation properties (e.g., "pod", "namespace", "controller")
-	Filter                     string   `json:"filter,omitempty"`                     // Filter expression for allocations (same as AllocationQuery)
-	EfficiencyBufferMultiplier *float64 `json:"efficiencyBufferMultiplier,omitempty"` // Buffer multiplier for recommendations (default: 1.2 for 20% headroom)
+	Step                       time.Duration `json:"step,omitempty"`                       // Query step size; controls peak memory by batching large windows (default: auto-scaled based on window)
+	Aggregate                  string        `json:"aggregate,omitempty"`                  // Aggregation properties (e.g., "pod", "namespace", "controller")
+	Filter                     string        `json:"filter,omitempty"`                     // Filter expression for allocations (same as AllocationQuery)
+	EfficiencyBufferMultiplier *float64      `json:"efficiencyBufferMultiplier,omitempty"` // Buffer multiplier for recommendations (default: 1.2 for 20% headroom)
 }
 
 // AllocationResponse represents the allocation data returned to the AI agent.
@@ -1016,6 +1017,23 @@ func transformCloudCostSetRange(ccsr *opencost.CloudCostSetRange) *CloudCostResp
 	}
 }
 
+// defaultEfficiencyStep returns a step duration that keeps peak memory
+// bounded for large query windows. When the caller does not specify a step,
+// this provides a safe default that avoids loading the entire window into
+// memory at once.
+func defaultEfficiencyStep(windowDuration time.Duration) time.Duration {
+	switch {
+	case windowDuration >= 30*24*time.Hour:
+		return 24 * time.Hour
+	case windowDuration >= 7*24*time.Hour:
+		return 6 * time.Hour
+	case windowDuration >= 24*time.Hour:
+		return time.Hour
+	default:
+		return windowDuration
+	}
+}
+
 // QueryEfficiency queries allocation data and computes efficiency metrics with recommendations.
 func (s *MCPServer) QueryEfficiency(query *OpenCostQueryRequest) (*EfficiencyResponse, error) {
 	// 1. Parse Window
@@ -1060,9 +1078,31 @@ func (s *MCPServer) QueryEfficiency(query *OpenCostQueryRequest) (*EfficiencyRes
 		filterString = ""
 	}
 
-	// 4. Query allocations with the specified parameters
-	// Use the entire window as step to get aggregated data
-	step := window.Duration()
+	// 4. Determine query step size.
+	// A smaller step reduces peak memory by breaking large windows into batches.
+	// Results are accumulated so the output is functionally equivalent regardless
+	// of step, though minor floating-point differences are possible because
+	// per-step cost calculations (which use max(request, usage)) are summed
+	// rather than computed in a single pass.
+	var step time.Duration
+	if query.EfficiencyParams != nil && query.EfficiencyParams.Step > 0 {
+		step = query.EfficiencyParams.Step
+	} else {
+		step = defaultEfficiencyStep(window.Duration())
+	}
+
+	if step > window.Duration() {
+		step = window.Duration()
+	}
+	if step <= 0 {
+		return nil, fmt.Errorf("invalid query: window has zero or negative duration")
+	}
+
+	accumulateBy := opencost.AccumulateOptionNone
+	if step < window.Duration() {
+		accumulateBy = opencost.AccumulateOptionAll
+	}
+
 	asr, err := s.costModel.QueryAllocation(
 		window,
 		step,
@@ -1072,7 +1112,7 @@ func (s *MCPServer) QueryEfficiency(query *OpenCostQueryRequest) (*EfficiencyRes
 		false, // includeProportionalAssetResourceCosts
 		false, // includeAggregatedMetadata
 		false, // sharedLoadBalancer
-		opencost.AccumulateOptionNone,
+		accumulateBy,
 		false, // shareIdle
 		filterString,
 	)

+ 44 - 0
pkg/mcp/server_test.go

@@ -920,11 +920,13 @@ func TestCloudCostQuery_NewFields(t *testing.T) {
 func TestEfficiencyQueryStruct(t *testing.T) {
 	bufferMultiplier := 1.4
 	query := EfficiencyQuery{
+		Step:                       5 * time.Minute,
 		Aggregate:                  "pod",
 		Filter:                     "namespace:production",
 		EfficiencyBufferMultiplier: &bufferMultiplier,
 	}
 
+	assert.Equal(t, 5*time.Minute, query.Step)
 	assert.Equal(t, "pod", query.Aggregate)
 	assert.Equal(t, "namespace:production", query.Filter)
 	assert.NotNil(t, query.EfficiencyBufferMultiplier)
@@ -934,6 +936,7 @@ func TestEfficiencyQueryStruct(t *testing.T) {
 func TestEfficiencyQueryDefaultValues(t *testing.T) {
 	query := EfficiencyQuery{}
 
+	assert.Equal(t, time.Duration(0), query.Step)
 	assert.Empty(t, query.Aggregate)
 	assert.Empty(t, query.Filter)
 	assert.Nil(t, query.EfficiencyBufferMultiplier)
@@ -1365,6 +1368,47 @@ func TestEfficiencyQueryType(t *testing.T) {
 	assert.Equal(t, QueryType("efficiency"), EfficiencyQueryType)
 }
 
+func TestDefaultEfficiencyStep(t *testing.T) {
+	tests := []struct {
+		name     string
+		window   time.Duration
+		expected time.Duration
+	}{
+		{"30d window uses 1d step", 30 * 24 * time.Hour, 24 * time.Hour},
+		{"90d window uses 1d step", 90 * 24 * time.Hour, 24 * time.Hour},
+		{"7d window uses 6h step", 7 * 24 * time.Hour, 6 * time.Hour},
+		{"14d window uses 6h step", 14 * 24 * time.Hour, 6 * time.Hour},
+		{"1d window uses 1h step", 24 * time.Hour, time.Hour},
+		{"3d window uses 1h step", 3 * 24 * time.Hour, time.Hour},
+		{"12h window returns full window", 12 * time.Hour, 12 * time.Hour},
+		{"1h window returns full window", time.Hour, time.Hour},
+		{"30m window returns full window", 30 * time.Minute, 30 * time.Minute},
+		{"exactly at 7d boundary uses 6h step", 7 * 24 * time.Hour, 6 * time.Hour},
+		{"just under 7d uses 1h step", 7*24*time.Hour - time.Minute, time.Hour},
+		{"just under 1d uses full window", 24*time.Hour - time.Minute, 24*time.Hour - time.Minute},
+		{"zero window returns zero", 0, 0},
+		{"negative window returns negative", -time.Hour, -time.Hour},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			assert.Equal(t, tt.expected, defaultEfficiencyStep(tt.window))
+		})
+	}
+}
+
+func TestEfficiencyQueryRequest_StepField(t *testing.T) {
+	req := &OpenCostQueryRequest{
+		QueryType: EfficiencyQueryType,
+		Window:    "7d",
+		EfficiencyParams: &EfficiencyQuery{
+			Step:      6 * time.Hour,
+			Aggregate: "pod",
+		},
+	}
+	assert.Equal(t, 6*time.Hour, req.EfficiencyParams.Step)
+	assert.Equal(t, "pod", req.EfficiencyParams.Aggregate)
+}
+
 // TestTransformCloudCostSetRange_NilPointerHandling verifies that nil pointer dereferences
 // are prevented in transformCloudCostSetRange for issue #3502
 func TestTransformCloudCostSetRange_NilPointerHandling(t *testing.T) {