Kaynağa Gözat

Fix variable naming in ConvertToGlueColumnFormat

Fix and test Glue formatting for external allocations
Niko Kovacevic 4 yıl önce
ebeveyn
işleme
59253d699a

+ 3 - 48
pkg/cloud/awsprovider.go

@@ -24,6 +24,7 @@ import (
 	"github.com/kubecost/cost-model/pkg/errors"
 	"github.com/kubecost/cost-model/pkg/log"
 	"github.com/kubecost/cost-model/pkg/util"
+	"github.com/kubecost/cost-model/pkg/util/cloudutil"
 	"github.com/kubecost/cost-model/pkg/util/json"
 
 	"github.com/aws/aws-sdk-go/aws"
@@ -1540,52 +1541,6 @@ func (a *AWS) GetDisks() ([]byte, error) {
 	})
 }
 
-// ConvertToGlueColumnFormat takes a string and runs through various regex
-// and string replacement statements to convert it to a format compatible
-// with AWS Glue and Athena column names.
-// Following guidance from AWS provided here ('Column Names' section):
-// https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/run-athena-sql.html
-// It returns a string containing the column name in proper column name format and length.
-func ConvertToGlueColumnFormat(column_name string) string {
-	log.Debugf("Converting string \"%s\" to proper AWS Glue column name.", column_name)
-
-	// An underscore is added in front of uppercase letters
-	capital_underscore := regexp.MustCompile(`[A-Z]`)
-	final := capital_underscore.ReplaceAllString(column_name, `_$0`)
-
-	// Any non-alphanumeric characters are replaced with an underscore
-	no_space_punc := regexp.MustCompile(`[\s]{1,}|[^A-Za-z0-9]`)
-	final = no_space_punc.ReplaceAllString(final, "_")
-
-	// Duplicate underscores are removed
-	no_dup_underscore := regexp.MustCompile(`_{2,}`)
-	final = no_dup_underscore.ReplaceAllString(final, "_")
-
-	// Any leading and trailing underscores are removed
-	no_front_end_underscore := regexp.MustCompile(`(^\_|\_$)`)
-	final = no_front_end_underscore.ReplaceAllString(final, "")
-
-	// Uppercase to lowercase
-	final = strings.ToLower(final)
-
-	// Longer column name than expected - remove _ left to right
-	allowed_col_len := 128
-	undersc_to_remove := len(final) - allowed_col_len
-	if undersc_to_remove > 0 {
-		final = strings.Replace(final, "_", "", undersc_to_remove)
-	}
-
-	// If removing all of the underscores still didn't
-	// make the column name < 128 characters, trim it!
-	if len(final) > allowed_col_len {
-		final = final[:allowed_col_len]
-	}
-
-	log.Debugf("Column name being returned: \"%s\". Length: \"%d\".", final, len(final))
-
-	return final
-}
-
 func generateAWSGroupBy(lastIdx int) string {
 	sequence := []string{}
 	for i := 1; i < lastIdx+1; i++ {
@@ -1955,7 +1910,7 @@ func (a *AWS) ExternalAllocations(start string, end string, aggregators []string
 	formattedAggregators := []string{}
 	for _, agg := range aggregators {
 		aggregator_column_name := "resource_tags_user_" + agg
-		aggregator_column_name = ConvertToGlueColumnFormat(aggregator_column_name)
+		aggregator_column_name = cloudutil.ConvertToGlueColumnFormat(aggregator_column_name)
 		formattedAggregators = append(formattedAggregators, aggregator_column_name)
 	}
 	aggregatorNames := strings.Join(formattedAggregators, ",")
@@ -1963,7 +1918,7 @@ func (a *AWS) ExternalAllocations(start string, end string, aggregators []string
 	aggregatorOr = aggregatorOr + " <> ''"
 
 	filter_column_name := "resource_tags_user_" + filterType
-	filter_column_name = ConvertToGlueColumnFormat(filter_column_name)
+	filter_column_name = cloudutil.ConvertToGlueColumnFormat(filter_column_name)
 
 	var query string
 	var lastIdx int

+ 10 - 1
pkg/kubecost/config.go

@@ -3,6 +3,8 @@ package kubecost
 import (
 	"fmt"
 	"strings"
+
+	"github.com/kubecost/cost-model/pkg/util/cloudutil"
 )
 
 // LabelConfig is a port of type AnalyzerConfig. We need to be more thoughtful
@@ -218,7 +220,14 @@ func (lc *LabelConfig) GetExternalAllocationName(labels map[string]string, aggre
 	// The relevant label is not present in the set of labels provided.
 	labelValue, ok := labels[labelName]
 	if !ok {
-		return ""
+		// Convert the label name to a format compatible with AWS Glue and
+		// Athena column naming and check again. If not found after that, then
+		// consider the label not present.
+		labelName = cloudutil.ConvertToGlueColumnFormat(labelName)
+		labelValue, ok = labels[labelName]
+		if !ok {
+			return ""
+		}
 	}
 
 	// When aggregating by some label (i.e. not by a Kubernetes concept),

+ 14 - 1
pkg/kubecost/config_test.go

@@ -1,6 +1,10 @@
 package kubecost
 
-import "testing"
+import (
+	"testing"
+
+	"github.com/kubecost/cost-model/pkg/util/cloudutil"
+)
 
 func TestLabelConfig_Map(t *testing.T) {
 	var m map[string]string
@@ -33,10 +37,14 @@ func TestLabelConfig_Map(t *testing.T) {
 }
 
 func TestLabelConfig_GetExternalAllocationName(t *testing.T) {
+	// Make sure that AWS's Glue/Athena column formatting is supported
+	glueFormattedLabel := cloudutil.ConvertToGlueColumnFormat("Non__GlueFormattedLabel")
+
 	labels := map[string]string{
 		"kubens":                      "kubecost-staging",
 		"env":                         "env1",
 		"app":                         "app1",
+		glueFormattedLabel:            "glue",
 		"kubernetes_cluster":          "cluster-one",
 		"kubernetes_namespace":        "kubecost",
 		"kubernetes_controller":       "kubecost-controller",
@@ -58,6 +66,7 @@ func TestLabelConfig_GetExternalAllocationName(t *testing.T) {
 	}{
 		{"label:env", "env=env1"},
 		{"label:app", "app=app1"},
+		{"label:Non__GlueFormattedLabel", "non_glue_formatted_label=glue"},
 		{"cluster", "cluster-one"},
 		{"namespace", "kubecost"},
 		{"controller", "kubecost-controller"},
@@ -92,12 +101,16 @@ func TestLabelConfig_GetExternalAllocationName(t *testing.T) {
 
 	// Change the external label for namespace and confirm it still works
 	lc.NamespaceExternalLabel = "kubens"
+	lc.PodExternalLabel = "Non__GlueFormattedLabel"
+
+	// TODO how is e.g. OwnerExternalLabel supposed to work?
 
 	testCases = []struct {
 		aggBy    string
 		expected string
 	}{
 		{"namespace", "kubecost-staging"},
+		{"pod", "glue"},
 	}
 	for _, tc := range testCases {
 		actual := lc.GetExternalAllocationName(labels, tc.aggBy)

+ 54 - 0
pkg/util/cloudutil/aws.go

@@ -0,0 +1,54 @@
+package cloudutil
+
+import (
+	"regexp"
+	"strings"
+
+	"github.com/kubecost/cost-model/pkg/log"
+)
+
+// ConvertToGlueColumnFormat takes a string and runs through various regex
+// and string replacement statements to convert it to a format compatible
+// with AWS Glue and Athena column names.
+// Following guidance from AWS provided here ('Column Names' section):
+// https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/run-athena-sql.html
+// It returns a string containing the column name in proper column name format and length.
+func ConvertToGlueColumnFormat(columnName string) string {
+	log.Debugf("Converting string \"%s\" to proper AWS Glue column name.", columnName)
+
+	// An underscore is added in front of uppercase letters
+	capitalUnderscore := regexp.MustCompile(`[A-Z]`)
+	final := capitalUnderscore.ReplaceAllString(columnName, `_$0`)
+
+	// Any non-alphanumeric characters are replaced with an underscore
+	noSpacePunc := regexp.MustCompile(`[\s]{1,}|[^A-Za-z0-9]`)
+	final = noSpacePunc.ReplaceAllString(final, "_")
+
+	// Duplicate underscores are removed
+	noDupUnderscore := regexp.MustCompile(`_{2,}`)
+	final = noDupUnderscore.ReplaceAllString(final, "_")
+
+	// Any leading and trailing underscores are removed
+	noFrontUnderscore := regexp.MustCompile(`(^\_|\_$)`)
+	final = noFrontUnderscore.ReplaceAllString(final, "")
+
+	// Uppercase to lowercase
+	final = strings.ToLower(final)
+
+	// Longer column name than expected - remove _ left to right
+	allowedColLen := 128
+	underscoreToRemove := len(final) - allowedColLen
+	if underscoreToRemove > 0 {
+		final = strings.Replace(final, "_", "", underscoreToRemove)
+	}
+
+	// If removing all of the underscores still didn't
+	// make the column name < 128 characters, trim it!
+	if len(final) > allowedColLen {
+		final = final[:allowedColLen]
+	}
+
+	log.Debugf("Column name being returned: \"%s\". Length: \"%d\".", final, len(final))
+
+	return final
+}