aws.go 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. package cloudutil
  2. import (
  3. "strings"
  4. "unicode"
  5. )
  6. // ConvertToGlueColumnFormat takes a string and runs through various regex
  7. // and string replacement statements to convert it to a format compatible
  8. // with AWS Glue and Athena column names.
  9. // Following guidance from AWS provided here ('Column Names' section):
  10. // https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/run-athena-sql.html
  11. // It returns a string containing the column name in proper column name format and length.
  12. func ConvertToGlueColumnFormat(columnName string) string {
  13. var sb strings.Builder
  14. var prev rune
  15. for i, r := range columnName {
  16. if unicode.IsUpper(r) && prev != '_' && i != 0 {
  17. sb.WriteRune('_')
  18. }
  19. if !unicode.IsLetter(r) && !unicode.IsNumber(r) {
  20. if prev != '_' && i != 0 && i != (len(columnName)-1) {
  21. sb.WriteRune('_')
  22. }
  23. prev = '_'
  24. continue
  25. }
  26. if r == '_' {
  27. if prev == '_' || i == 0 || i == len(columnName)-1 {
  28. prev = '_'
  29. continue
  30. }
  31. }
  32. sb.WriteRune(unicode.ToLower(r))
  33. prev = r
  34. }
  35. final := sb.String()
  36. // Longer column name than expected - remove _ left to right
  37. allowedColLen := 128
  38. underscoreToRemove := len(final) - allowedColLen
  39. if underscoreToRemove > 0 {
  40. final = strings.Replace(final, "_", "", underscoreToRemove)
  41. }
  42. // If removing all of the underscores still didn't
  43. // make the column name < 128 characters, trim it!
  44. if len(final) > allowedColLen {
  45. final = final[:allowedColLen]
  46. }
  47. return final
  48. }