aws.go 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. package cloudutil
  2. import (
  3. "regexp"
  4. "strings"
  5. "github.com/kubecost/cost-model/pkg/log"
  6. )
  7. var capitalUnderscore = regexp.MustCompile(`[A-Z]`)
  8. var noSpacePunc = regexp.MustCompile(`[\s]{1,}|[^A-Za-z0-9]`)
  9. var noDupUnderscore = regexp.MustCompile(`_{2,}`)
  10. var noFrontUnderscore = regexp.MustCompile(`(^\_|\_$)`)
  11. // ConvertToGlueColumnFormat takes a string and runs through various regex
  12. // and string replacement statements to convert it to a format compatible
  13. // with AWS Glue and Athena column names.
  14. // Following guidance from AWS provided here ('Column Names' section):
  15. // https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/run-athena-sql.html
  16. // It returns a string containing the column name in proper column name format and length.
  17. func ConvertToGlueColumnFormat(columnName string) string {
  18. log.Debugf("Converting string \"%s\" to proper AWS Glue column name.", columnName)
  19. // An underscore is added in front of uppercase letters
  20. final := capitalUnderscore.ReplaceAllString(columnName, `_$0`)
  21. // Any non-alphanumeric characters are replaced with an underscore
  22. final = noSpacePunc.ReplaceAllString(final, "_")
  23. // Duplicate underscores are removed
  24. final = noDupUnderscore.ReplaceAllString(final, "_")
  25. // Any leading and trailing underscores are removed
  26. final = noFrontUnderscore.ReplaceAllString(final, "")
  27. // Uppercase to lowercase
  28. final = strings.ToLower(final)
  29. // Longer column name than expected - remove _ left to right
  30. allowedColLen := 128
  31. underscoreToRemove := len(final) - allowedColLen
  32. if underscoreToRemove > 0 {
  33. final = strings.Replace(final, "_", "", underscoreToRemove)
  34. }
  35. // If removing all of the underscores still didn't
  36. // make the column name < 128 characters, trim it!
  37. if len(final) > allowedColLen {
  38. final = final[:allowedColLen]
  39. }
  40. log.Debugf("Column name being returned: \"%s\". Length: \"%d\".", final, len(final))
  41. return final
  42. }