aws.go 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. package cloudutil
  2. import (
  3. "strings"
  4. "unicode"
  5. )
  6. // ConvertToGlueColumnFormat takes a string and runs through various regex
  7. // and string replacement statements to convert it to a format compatible
  8. // with AWS Glue and Athena column names.
  9. // Following guidance from AWS provided here ('Column Names' section):
  10. // https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/run-athena-sql.html
  11. // It returns a string containing the column name in proper column name format and length.
  12. func ConvertToGlueColumnFormat(columnName string) string {
  13. var sb strings.Builder
  14. var prev rune
  15. for i, r := range columnName {
  16. if unicode.IsUpper(r) && prev != '_' && i != 0 {
  17. sb.WriteRune('_')
  18. }
  19. if !unicode.IsLetter(r) && !unicode.IsNumber(r) {
  20. if prev != '_' && i != 0 && i != (len(columnName)-1) {
  21. sb.WriteRune('_')
  22. }
  23. prev = '_'
  24. continue
  25. }
  26. if r == '_' {
  27. if prev == '_' || i == 0 || i == len(columnName)-1 {
  28. prev = '_'
  29. continue
  30. }
  31. }
  32. sb.WriteRune(unicode.ToLower(r))
  33. prev = r
  34. }
  35. final := sb.String()
  36. if prev == '_' { // string any trailing '_'
  37. final = final[:len(final)-1]
  38. }
  39. // Longer column name than expected - remove _ left to right
  40. allowedColLen := 128
  41. underscoreToRemove := len(final) - allowedColLen
  42. if underscoreToRemove > 0 {
  43. final = strings.Replace(final, "_", "", underscoreToRemove)
  44. }
  45. // If removing all of the underscores still didn't
  46. // make the column name < 128 characters, trim it!
  47. if len(final) > allowedColLen {
  48. final = final[:allowedColLen]
  49. }
  50. return final
  51. }