aws.go 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. package cloudutil
  2. import (
  3. "regexp"
  4. "strings"
  5. "github.com/kubecost/cost-model/pkg/log"
  6. )
  7. // ConvertToGlueColumnFormat takes a string and runs through various regex
  8. // and string replacement statements to convert it to a format compatible
  9. // with AWS Glue and Athena column names.
  10. // Following guidance from AWS provided here ('Column Names' section):
  11. // https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/run-athena-sql.html
  12. // It returns a string containing the column name in proper column name format and length.
  13. func ConvertToGlueColumnFormat(columnName string) string {
  14. log.Debugf("Converting string \"%s\" to proper AWS Glue column name.", columnName)
  15. // An underscore is added in front of uppercase letters
  16. capitalUnderscore := regexp.MustCompile(`[A-Z]`)
  17. final := capitalUnderscore.ReplaceAllString(columnName, `_$0`)
  18. // Any non-alphanumeric characters are replaced with an underscore
  19. noSpacePunc := regexp.MustCompile(`[\s]{1,}|[^A-Za-z0-9]`)
  20. final = noSpacePunc.ReplaceAllString(final, "_")
  21. // Duplicate underscores are removed
  22. noDupUnderscore := regexp.MustCompile(`_{2,}`)
  23. final = noDupUnderscore.ReplaceAllString(final, "_")
  24. // Any leading and trailing underscores are removed
  25. noFrontUnderscore := regexp.MustCompile(`(^\_|\_$)`)
  26. final = noFrontUnderscore.ReplaceAllString(final, "")
  27. // Uppercase to lowercase
  28. final = strings.ToLower(final)
  29. // Longer column name than expected - remove _ left to right
  30. allowedColLen := 128
  31. underscoreToRemove := len(final) - allowedColLen
  32. if underscoreToRemove > 0 {
  33. final = strings.Replace(final, "_", "", underscoreToRemove)
  34. }
  35. // If removing all of the underscores still didn't
  36. // make the column name < 128 characters, trim it!
  37. if len(final) > allowedColLen {
  38. final = final[:allowedColLen]
  39. }
  40. log.Debugf("Column name being returned: \"%s\". Length: \"%d\".", final, len(final))
  41. return final
  42. }