Просмотр исходного кода

Add and Use GPU hours for nodes

Sean Holcomb 5 лет назад
Родитель
Сommit
fb7c928dfb
4 измененных файлов с 61 добавлено и 14 удалено
  1. 11 2
      pkg/kubecost/allocation.go
  2. 8 8
      pkg/kubecost/allocation_test.go
  3. 26 4
      pkg/kubecost/asset.go
  4. 16 0
      pkg/kubecost/asset_test.go

+ 11 - 2
pkg/kubecost/allocation.go

@@ -342,6 +342,14 @@ func (a *Allocation) RAMBytes() float64 {
 	return a.RAMByteHours / (a.Minutes() / 60.0)
 }
 
+// GPUs converts the Allocation's GPUHours into average GPUs
+func (a *Allocation) GPUs() float64 {
+	if a.Minutes() <= 0.0 {
+		return 0.0
+	}
+	return a.GPUHours / (a.Minutes() / 60.0)
+}
+
 // PVBytes converts the Allocation's PVByteHours into average PVBytes
 func (a *Allocation) PVBytes() float64 {
 	if a.Minutes() <= 0.0 {
@@ -366,6 +374,7 @@ func (a *Allocation) MarshalJSON() ([]byte, error) {
 	jsonEncodeFloat64(buffer, "cpuCost", a.CPUCost, ",")
 	jsonEncodeFloat64(buffer, "cpuAdjustment", a.CPUAdjustment, ",")
 	jsonEncodeFloat64(buffer, "cpuEfficiency", a.CPUEfficiency(), ",")
+	jsonEncodeFloat64(buffer, "gpuCount", a.GPUs(), ",")
 	jsonEncodeFloat64(buffer, "gpuHours", a.GPUHours, ",")
 	jsonEncodeFloat64(buffer, "gpuCost", a.GPUCost, ",")
 	jsonEncodeFloat64(buffer, "gpuAdjustment", a.GPUAdjustment, ",")
@@ -1568,8 +1577,8 @@ func (as *AllocationSet) ReconcileAllocations(assetSet *AssetSet) error {
 			log.Warningf("Missing Ram Byte Hours for node Provider ID: %s", providerId)
 		}
 		gpuUsageProportion := 0.0
-		if node.GPUCount != 0 && node.Minutes() != 0 {
-			gpuUsageProportion = a.GPUHours / (node.GPUCount * node.Minutes() / 60)
+		if node.GPUHours != 0 {
+			gpuUsageProportion = a.GPUHours / node.GPUHours
 		}
 		// No log for GPU because not all nodes have GPU
 

+ 8 - 8
pkg/kubecost/allocation_test.go

@@ -754,7 +754,7 @@ func generateAssetSets(start, end time.Time) []*AssetSet {
 	cluster1Nodes.adjustment = -10.00
 	cluster1Nodes.CPUCoreHours = 8
 	cluster1Nodes.RAMByteHours = 6
-	cluster1Nodes.GPUCount = 1
+	cluster1Nodes.GPUHours = 24
 
 	cluster2Node1 := NewNode("node1", "cluster2", "node1", start, end, NewWindow(&start, &end))
 	cluster2Node1.CPUCost = 20.0
@@ -762,7 +762,7 @@ func generateAssetSets(start, end time.Time) []*AssetSet {
 	cluster2Node1.GPUCost = 0.0
 	cluster2Node1.CPUCoreHours = 4
 	cluster2Node1.RAMByteHours = 3
-	cluster2Node1.GPUCount = 0
+	cluster2Node1.GPUHours = 0
 
 	cluster2Node2 := NewNode("node2", "cluster2", "node2", start, end, NewWindow(&start, &end))
 	cluster2Node2.CPUCost = 20.0
@@ -770,7 +770,7 @@ func generateAssetSets(start, end time.Time) []*AssetSet {
 	cluster2Node2.GPUCost = 0.0
 	cluster2Node2.CPUCoreHours = 3
 	cluster2Node2.RAMByteHours = 2
-	cluster2Node2.GPUCount = 0
+	cluster2Node2.GPUHours = 0
 
 	cluster2Node3 := NewNode("node3", "cluster2", "node3", start, end, NewWindow(&start, &end))
 	cluster2Node3.CPUCost = 10.0
@@ -778,7 +778,7 @@ func generateAssetSets(start, end time.Time) []*AssetSet {
 	cluster2Node3.GPUCost = 10.0
 	cluster2Node3.CPUCoreHours = 2
 	cluster2Node3.RAMByteHours = 2
-	cluster2Node3.GPUCount = 1
+	cluster2Node3.GPUHours = 24
 
 	cluster2Disk1 := NewDisk("disk1", "cluster2", "disk1", start, end, NewWindow(&start, &end))
 	cluster2Disk1.Cost = 5.0
@@ -820,7 +820,7 @@ func generateAssetSets(start, end time.Time) []*AssetSet {
 	cluster1Nodes.adjustment = 90.00
 	cluster1Nodes.CPUCoreHours = 8
 	cluster1Nodes.RAMByteHours = 6
-	cluster1Nodes.GPUCount = 1
+	cluster1Nodes.GPUHours = 24
 
 	cluster2Node1 = NewNode("node1", "cluster2", "node1", start, end, NewWindow(&start, &end))
 	cluster2Node1.CPUCost = 20.0
@@ -828,7 +828,7 @@ func generateAssetSets(start, end time.Time) []*AssetSet {
 	cluster2Node1.GPUCost = 0.0
 	cluster2Node1.CPUCoreHours = 4
 	cluster2Node1.RAMByteHours = 3
-	cluster2Node1.GPUCount = 0
+	cluster2Node1.GPUHours = 0
 
 	cluster2Node2 = NewNode("node2", "cluster2", "node2", start, end, NewWindow(&start, &end))
 	cluster2Node2.CPUCost = 20.0
@@ -836,7 +836,7 @@ func generateAssetSets(start, end time.Time) []*AssetSet {
 	cluster2Node2.GPUCost = 0.0
 	cluster2Node2.CPUCoreHours = 3
 	cluster2Node2.RAMByteHours = 2
-	cluster2Node2.GPUCount = 0
+	cluster2Node2.GPUHours = 0
 
 	cluster2Node3 = NewNode("node3", "cluster2", "node3", start, end, NewWindow(&start, &end))
 	cluster2Node3.CPUCost = 10.0
@@ -844,7 +844,7 @@ func generateAssetSets(start, end time.Time) []*AssetSet {
 	cluster2Node3.GPUCost = 10.0
 	cluster2Node3.CPUCoreHours = 2
 	cluster2Node3.RAMByteHours = 2
-	cluster2Node3.GPUCount = 1
+	cluster2Node3.GPUHours = 24
 
 	cluster2Disk1 = NewDisk("disk1", "cluster2", "disk1", start, end, NewWindow(&start, &end))
 	cluster2Disk1.Cost = 5.0

+ 26 - 4
pkg/kubecost/asset.go

@@ -1663,6 +1663,7 @@ type Node struct {
 	NodeType     string
 	CPUCoreHours float64
 	RAMByteHours float64
+	GPUHours     float64
 	CPUBreakdown *Breakdown
 	RAMBreakdown *Breakdown
 	CPUCost      float64
@@ -1889,6 +1890,7 @@ func (n *Node) add(that *Node) {
 
 	n.CPUCoreHours += that.CPUCoreHours
 	n.RAMByteHours += that.RAMByteHours
+	n.GPUHours += that.GPUHours
 
 	n.CPUCost += that.CPUCost
 	n.GPUCost += that.GPUCost
@@ -1912,6 +1914,7 @@ func (n *Node) Clone() Asset {
 		NodeType:     n.NodeType,
 		CPUCoreHours: n.CPUCoreHours,
 		RAMByteHours: n.RAMByteHours,
+		GPUHours:     n.GPUHours,
 		CPUBreakdown: n.CPUBreakdown.Clone(),
 		RAMBreakdown: n.RAMBreakdown.Clone(),
 		CPUCost:      n.CPUCost,
@@ -1960,6 +1963,9 @@ func (n *Node) Equal(a Asset) bool {
 	if n.RAMByteHours != that.RAMByteHours {
 		return false
 	}
+	if n.GPUHours != that.GPUHours {
+		return false
+	}
 	if !n.CPUBreakdown.Equal(that.CPUBreakdown) {
 		return false
 	}
@@ -2000,13 +2006,14 @@ func (n *Node) MarshalJSON() ([]byte, error) {
 	jsonEncodeFloat64(buffer, "ramBytes", n.RAMBytes(), ",")
 	jsonEncodeFloat64(buffer, "cpuCoreHours", n.CPUCoreHours, ",")
 	jsonEncodeFloat64(buffer, "ramByteHours", n.RAMByteHours, ",")
+	jsonEncodeFloat64(buffer, "GPUHours", n.GPUHours, ",")
 	jsonEncode(buffer, "cpuBreakdown", n.CPUBreakdown, ",")
 	jsonEncode(buffer, "ramBreakdown", n.RAMBreakdown, ",")
 	jsonEncodeFloat64(buffer, "preemptible", n.Preemptible, ",")
 	jsonEncodeFloat64(buffer, "discount", n.Discount, ",")
 	jsonEncodeFloat64(buffer, "cpuCost", n.CPUCost, ",")
 	jsonEncodeFloat64(buffer, "gpuCost", n.GPUCost, ",")
-	jsonEncodeFloat64(buffer, "gpuCount", n.GPUCount, ",")
+	jsonEncodeFloat64(buffer, "gpuCount", n.GPUs(), ",")
 	jsonEncodeFloat64(buffer, "ramCost", n.RAMCost, ",")
 	jsonEncodeFloat64(buffer, "adjustment", n.Adjustment(), ",")
 	jsonEncodeFloat64(buffer, "totalCost", n.TotalCost(), "")
@@ -2047,15 +2054,30 @@ func (n *Node) CPUCores() float64 {
 // and a 16GiB-RAM node running for the last 20 hours of the same 24-hour window
 // would produce:
 //   (12*10 + 16*20) / 24 = 18.333GiB RAM
-// However, any number of cores running for the full span of a window will
-// report the actual number of cores of the static node; e.g. the above
+// However, any number of bytes running for the full span of a window will
+// report the actual number of bytes of the static node; e.g. the above
 // scenario for one entire 24-hour window:
-//   (12*24 + 16*24) / 24 = (12 + 16) = 28 cores
+//   (12*24 + 16*24) / 24 = (12 + 16) = 28GiB RAM
 func (n *Node) RAMBytes() float64 {
 	// [b*hr]*([min/hr]*[1/min]) = [b*hr]/[hr] = b
 	return n.RAMByteHours * (60.0 / n.Minutes())
 }
 
+// GPUs returns the amount of GPUs belonging to the node. This could be
+// fractional because it's the number of gpu*hours divided by the number of
+// hours running; e.g. the sum of a 2 gpu node running for the first 10 hours
+// and a 1 gpu node running for the last 20 hours of the same 24-hour window
+// would produce:
+//   (2*10 + 1*20) / 24 = 1.667 GPUs
+// However, any number of GPUs running for the full span of a window will
+// report the actual number of GPUs of the static node; e.g. the above
+// scenario for one entire 24-hour window:
+//   (2*24 + 1*24) / 24 = (2 + 1) = 3 GPUs
+func (n *Node) GPUs() float64 {
+	// [b*hr]*([min/hr]*[1/min]) = [b*hr]/[hr] = b
+	return n.GPUHours * (60.0 / n.Minutes())
+}
+
 // LoadBalancer is an Asset representing a single load balancer in a cluster
 // TODO: add GB of ingress processed, numForwardingRules once we start recording those to prometheus metric
 type LoadBalancer struct {

+ 16 - 0
pkg/kubecost/asset_test.go

@@ -65,6 +65,7 @@ func generateAssetSet(start time.Time) *AssetSet {
 	node1.Discount = 0.5
 	node1.CPUCoreHours = 2.0 * hours
 	node1.RAMByteHours = 4.0 * gb * hours
+	node1.GPUHours = 1.0 * hours
 	node1.SetAdjustment(1.0)
 	node1.SetLabels(map[string]string{"test": "test"})
 
@@ -75,6 +76,7 @@ func generateAssetSet(start time.Time) *AssetSet {
 	node2.Discount = 0.5
 	node2.CPUCoreHours = 2.0 * hours
 	node2.RAMByteHours = 4.0 * gb * hours
+	node2.GPUHours = 0.0 * hours
 	node2.SetAdjustment(1.5)
 
 	node3 := NewNode("node3", "cluster1", "gcp-node3", *window.Clone().start, *window.Clone().end, window.Clone())
@@ -84,6 +86,7 @@ func generateAssetSet(start time.Time) *AssetSet {
 	node3.Discount = 0.5
 	node3.CPUCoreHours = 2.0 * hours
 	node3.RAMByteHours = 4.0 * gb * hours
+	node3.GPUHours = 2.0 * hours
 	node3.SetAdjustment(-0.5)
 
 	node4 := NewNode("node4", "cluster2", "gcp-node4", *window.Clone().start, *window.Clone().end, window.Clone())
@@ -93,6 +96,7 @@ func generateAssetSet(start time.Time) *AssetSet {
 	node4.Discount = 0.25
 	node4.CPUCoreHours = 4.0 * hours
 	node4.RAMByteHours = 12.0 * gb * hours
+	node4.GPUHours = 0.0 * hours
 	node4.SetAdjustment(-1.0)
 
 	node5 := NewNode("node5", "cluster3", "aws-node5", *window.Clone().start, *window.Clone().end, window.Clone())
@@ -102,6 +106,7 @@ func generateAssetSet(start time.Time) *AssetSet {
 	node5.Discount = 0.0
 	node5.CPUCoreHours = 8.0 * hours
 	node5.RAMByteHours = 24.0 * gb * hours
+	node5.GPUHours = 0.0 * hours
 	node5.SetAdjustment(2.0)
 
 	disk1 := NewDisk("disk1", "cluster1", "gcp-disk1", *window.Clone().start, *window.Clone().end, window.Clone())
@@ -481,6 +486,7 @@ func TestNode_Add(t *testing.T) {
 	node1 := NewNode("node1", "cluster1", "node1", *windows[0].start, *windows[0].end, windows[0])
 	node1.CPUCoreHours = 1.0 * hours
 	node1.RAMByteHours = 2.0 * gb * hours
+	node1.GPUHours = 0.0 * hours
 	node1.GPUCost = 0.0
 	node1.CPUCost = 8.0
 	node1.RAMCost = 4.0
@@ -502,6 +508,7 @@ func TestNode_Add(t *testing.T) {
 	node2 := NewNode("node2", "cluster1", "node2", *windows[0].start, *windows[0].end, windows[0])
 	node2.CPUCoreHours = 1.0 * hours
 	node2.RAMByteHours = 2.0 * gb * hours
+	node2.GPUHours = 0.0 * hours
 	node2.GPUCost = 0.0
 	node2.CPUCost = 3.0
 	node2.RAMCost = 1.0
@@ -566,6 +573,7 @@ func TestNode_Add(t *testing.T) {
 	node3 := NewNode("node3", "cluster1", "node3", *windows[0].start, *windows[0].end, windows[0])
 	node3.CPUCoreHours = 0 * hours
 	node3.RAMByteHours = 0 * hours
+	node3.GPUHours = 0.0 * hours
 	node3.GPUCost = 0
 	node3.CPUCost = 0.0
 	node3.RAMCost = 0.0
@@ -575,6 +583,7 @@ func TestNode_Add(t *testing.T) {
 	node4 := NewNode("node4", "cluster1", "node4", *windows[0].start, *windows[0].end, windows[0])
 	node4.CPUCoreHours = 0 * hours
 	node4.RAMByteHours = 0 * hours
+	node4.GPUHours = 0.0 * hours
 	node4.GPUCost = 0
 	node4.CPUCost = 0.0
 	node4.RAMCost = 0.0
@@ -595,6 +604,7 @@ func TestNode_Add(t *testing.T) {
 	nodeA1 := NewNode("nodeA1", "cluster1", "nodeA1", *windows[0].start, *windows[0].end, windows[0])
 	nodeA1.CPUCoreHours = 1.0 * hours
 	nodeA1.RAMByteHours = 2.0 * gb * hours
+	nodeA1.GPUHours = 0.0 * hours
 	nodeA1.GPUCost = 0.0
 	nodeA1.CPUCost = 8.0
 	nodeA1.RAMCost = 4.0
@@ -604,6 +614,7 @@ func TestNode_Add(t *testing.T) {
 	nodeA2 := NewNode("nodeA2", "cluster1", "nodeA2", *windows[1].start, *windows[1].end, windows[1])
 	nodeA2.CPUCoreHours = 1.0 * hours
 	nodeA2.RAMByteHours = 2.0 * gb * hours
+	nodeA2.GPUHours = 0.0 * hours
 	nodeA2.GPUCost = 0.0
 	nodeA2.CPUCost = 3.0
 	nodeA2.RAMCost = 1.0
@@ -637,6 +648,9 @@ func TestNode_Add(t *testing.T) {
 	if nodeAT.RAMBytes() != 2.0*gb {
 		t.Fatalf("Node.Add: expected %f; got %f", 2.0*gb, nodeAT.RAMBytes())
 	}
+	if nodeAT.GPUs() != 0.0 {
+		t.Fatalf("Node.Add: expected %f; got %f", 0.0, nodeAT.GPUs())
+	}
 
 	// Check that the original assets are unchanged
 	if !util.IsApproximately(nodeA1.TotalCost(), 10.0) {
@@ -664,8 +678,10 @@ func TestNode_MarshalJSON(t *testing.T) {
 	})
 	node.CPUCost = 9.0
 	node.RAMCost = 0.0
+	node.RAMCost = 21.0
 	node.CPUCoreHours = 123.0
 	node.RAMByteHours = 13323.0
+	node.GPUHours = 123.0
 	node.SetAdjustment(1.0)
 
 	_, err := json.Marshal(node)