plural_rules.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. package flect
  2. import "fmt"
  3. var pluralRules = []rule{}
  4. // AddPlural adds a rule that will replace the given suffix with the replacement suffix.
  5. // The name is confusing. This function will be deprecated in the next release.
  6. func AddPlural(suffix string, repl string) {
  7. InsertPluralRule(suffix, repl)
  8. }
  9. // InsertPluralRule inserts a rule that will replace the given suffix with
  10. // the repl(acement) at the begining of the list of the pluralize rules.
  11. func InsertPluralRule(suffix, repl string) {
  12. pluralMoot.Lock()
  13. defer pluralMoot.Unlock()
  14. pluralRules = append([]rule{{
  15. suffix: suffix,
  16. fn: simpleRuleFunc(suffix, repl),
  17. }}, pluralRules...)
  18. pluralRules = append([]rule{{
  19. suffix: repl,
  20. fn: noop,
  21. }}, pluralRules...)
  22. }
  23. type word struct {
  24. singular string
  25. plural string
  26. alternative string
  27. unidirectional bool // plural to singular is not possible (or bad)
  28. uncountable bool
  29. exact bool
  30. }
  31. // dictionary is the main table for singularize and pluralize.
  32. // All words in the dictionary will be added to singleToPlural, pluralToSingle
  33. // and singlePluralAssertions by init() functions.
  34. var dictionary = []word{
  35. // identicals https://en.wikipedia.org/wiki/English_plurals#Nouns_with_identical_singular_and_plural
  36. {singular: "aircraft", plural: "aircraft"},
  37. {singular: "beef", plural: "beef", alternative: "beefs"},
  38. {singular: "bison", plural: "bison"},
  39. {singular: "blues", plural: "blues", unidirectional: true},
  40. {singular: "chassis", plural: "chassis"},
  41. {singular: "deer", plural: "deer"},
  42. {singular: "fish", plural: "fish", alternative: "fishes"},
  43. {singular: "moose", plural: "moose"},
  44. {singular: "police", plural: "police"},
  45. {singular: "salmon", plural: "salmon", alternative: "salmons"},
  46. {singular: "series", plural: "series"},
  47. {singular: "sheep", plural: "sheep"},
  48. {singular: "shrimp", plural: "shrimp", alternative: "shrimps"},
  49. {singular: "species", plural: "species"},
  50. {singular: "swine", plural: "swine", alternative: "swines"},
  51. {singular: "trout", plural: "trout", alternative: "trouts"},
  52. {singular: "tuna", plural: "tuna", alternative: "tunas"},
  53. {singular: "you", plural: "you"},
  54. // -en https://en.wikipedia.org/wiki/English_plurals#Plurals_in_-(e)n
  55. {singular: "child", plural: "children"},
  56. {singular: "ox", plural: "oxen", exact: true},
  57. // apophonic https://en.wikipedia.org/wiki/English_plurals#Apophonic_plurals
  58. {singular: "foot", plural: "feet"},
  59. {singular: "goose", plural: "geese"},
  60. {singular: "man", plural: "men"},
  61. {singular: "human", plural: "humans"}, // not humen
  62. {singular: "louse", plural: "lice", exact: true},
  63. {singular: "mouse", plural: "mice"},
  64. {singular: "tooth", plural: "teeth"},
  65. {singular: "woman", plural: "women"},
  66. // misc https://en.wikipedia.org/wiki/English_plurals#Miscellaneous_irregular_plurals
  67. {singular: "die", plural: "dice", exact: true},
  68. {singular: "person", plural: "people"},
  69. // Words from French that end in -u add an x; in addition to eau to eaux rule
  70. {singular: "adieu", plural: "adieux", alternative: "adieus"},
  71. {singular: "fabliau", plural: "fabliaux"},
  72. {singular: "bureau", plural: "bureaus", alternative: "bureaux"}, // popular
  73. // Words from Greek that end in -on change -on to -a; in addition to hedron rule
  74. {singular: "criterion", plural: "criteria"},
  75. {singular: "ganglion", plural: "ganglia", alternative: "ganglions"},
  76. {singular: "lexicon", plural: "lexica", alternative: "lexicons"},
  77. {singular: "mitochondrion", plural: "mitochondria", alternative: "mitochondrions"},
  78. {singular: "noumenon", plural: "noumena"},
  79. {singular: "phenomenon", plural: "phenomena"},
  80. {singular: "taxon", plural: "taxa"},
  81. // Words from Latin that end in -um change -um to -a; in addition to some rules
  82. {singular: "media", plural: "media"}, // popular case: media -> media
  83. {singular: "medium", plural: "media", alternative: "mediums", unidirectional: true},
  84. {singular: "stadium", plural: "stadiums", alternative: "stadia"},
  85. {singular: "aquarium", plural: "aquaria", alternative: "aquariums"},
  86. {singular: "auditorium", plural: "auditoria", alternative: "auditoriums"},
  87. {singular: "symposium", plural: "symposia", alternative: "symposiums"},
  88. {singular: "curriculum", plural: "curriculums", alternative: "curricula"}, // ulum
  89. {singular: "quota", plural: "quotas"},
  90. // Words from Latin that end in -us change -us to -i or -era
  91. {singular: "alumnus", plural: "alumni", alternative: "alumnuses"}, // -i
  92. {singular: "bacillus", plural: "bacilli"},
  93. {singular: "cactus", plural: "cacti", alternative: "cactuses"},
  94. {singular: "coccus", plural: "cocci"},
  95. {singular: "focus", plural: "foci", alternative: "focuses"},
  96. {singular: "locus", plural: "loci", alternative: "locuses"},
  97. {singular: "nucleus", plural: "nuclei", alternative: "nucleuses"},
  98. {singular: "octopus", plural: "octupuses", alternative: "octopi"},
  99. {singular: "radius", plural: "radii", alternative: "radiuses"},
  100. {singular: "syllabus", plural: "syllabi"},
  101. {singular: "corpus", plural: "corpora", alternative: "corpuses"}, // -ra
  102. {singular: "genus", plural: "genera"},
  103. // Words from Latin that end in -a change -a to -ae
  104. {singular: "alumna", plural: "alumnae"},
  105. {singular: "vertebra", plural: "vertebrae"},
  106. {singular: "differentia", plural: "differentiae"}, // -tia
  107. {singular: "minutia", plural: "minutiae"},
  108. {singular: "vita", plural: "vitae"}, // -ita
  109. {singular: "larva", plural: "larvae"}, // -va
  110. {singular: "postcava", plural: "postcavae"},
  111. {singular: "praecava", plural: "praecavae"},
  112. {singular: "uva", plural: "uvae"},
  113. // Words from Latin that end in -ex change -ex to -ices
  114. {singular: "apex", plural: "apices", alternative: "apexes"},
  115. {singular: "codex", plural: "codices", alternative: "codexes"},
  116. {singular: "index", plural: "indices", alternative: "indexes"},
  117. {singular: "latex", plural: "latices", alternative: "latexes"},
  118. {singular: "vertex", plural: "vertices", alternative: "vertexes"},
  119. {singular: "vortex", plural: "vortices", alternative: "vortexes"},
  120. // Words from Latin that end in -ix change -ix to -ices (eg, matrix becomes matrices)
  121. {singular: "appendix", plural: "appendices", alternative: "appendixes"},
  122. {singular: "radix", plural: "radices", alternative: "radixes"},
  123. {singular: "helix", plural: "helices", alternative: "helixes"},
  124. // Words from Latin that end in -is change -is to -es
  125. {singular: "axis", plural: "axes", exact: true},
  126. {singular: "crisis", plural: "crises"},
  127. {singular: "ellipsis", plural: "ellipses", unidirectional: true}, // ellipse
  128. {singular: "genesis", plural: "geneses"},
  129. {singular: "oasis", plural: "oases"},
  130. {singular: "thesis", plural: "theses"},
  131. {singular: "testis", plural: "testes"},
  132. {singular: "base", plural: "bases"}, // popular case
  133. {singular: "basis", plural: "bases", unidirectional: true},
  134. {singular: "alias", plural: "aliases", exact: true}, // no alia, no aliasis
  135. {singular: "vedalia", plural: "vedalias"}, // no vedalium, no vedaliases
  136. // Words that end in -ch, -o, -s, -sh, -x, -z (can be conflict with the others)
  137. {singular: "use", plural: "uses", exact: true}, // us vs use
  138. {singular: "abuse", plural: "abuses"},
  139. {singular: "cause", plural: "causes"},
  140. {singular: "clause", plural: "clauses"},
  141. {singular: "cruse", plural: "cruses"},
  142. {singular: "excuse", plural: "excuses"},
  143. {singular: "fuse", plural: "fuses"},
  144. {singular: "house", plural: "houses"},
  145. {singular: "misuse", plural: "misuses"},
  146. {singular: "muse", plural: "muses"},
  147. {singular: "pause", plural: "pauses"},
  148. {singular: "ache", plural: "aches"},
  149. {singular: "topaz", plural: "topazes"},
  150. {singular: "buffalo", plural: "buffaloes", alternative: "buffalos"},
  151. {singular: "potato", plural: "potatoes"},
  152. {singular: "tomato", plural: "tomatoes"},
  153. // uncountables
  154. {singular: "equipment", uncountable: true},
  155. {singular: "information", uncountable: true},
  156. {singular: "jeans", uncountable: true},
  157. {singular: "money", uncountable: true},
  158. {singular: "news", uncountable: true},
  159. {singular: "rice", uncountable: true},
  160. // exceptions: -f to -ves, not -fe
  161. {singular: "dwarf", plural: "dwarfs", alternative: "dwarves"},
  162. {singular: "hoof", plural: "hoofs", alternative: "hooves"},
  163. {singular: "thief", plural: "thieves"},
  164. // exceptions: instead of -f(e) to -ves
  165. {singular: "chive", plural: "chives"},
  166. {singular: "hive", plural: "hives"},
  167. {singular: "move", plural: "moves"},
  168. // exceptions: instead of -y to -ies
  169. {singular: "movie", plural: "movies"},
  170. {singular: "cookie", plural: "cookies"},
  171. // exceptions: instead of -um to -a
  172. {singular: "pretorium", plural: "pretoriums"},
  173. {singular: "agenda", plural: "agendas"}, // instead of plural of agendum
  174. // exceptions: instead of -um to -a (chemical element names)
  175. // Words from Latin that end in -a change -a to -ae
  176. {singular: "formula", plural: "formulas", alternative: "formulae"}, // also -um/-a
  177. // exceptions: instead of -o to -oes
  178. {singular: "shoe", plural: "shoes"},
  179. {singular: "toe", plural: "toes", exact: true},
  180. {singular: "graffiti", plural: "graffiti"},
  181. // abbreviations
  182. {singular: "ID", plural: "IDs", exact: true},
  183. }
  184. // singleToPlural is the highest priority map for Pluralize().
  185. // singularToPluralSuffixList is used to build pluralRules for suffixes and
  186. // compound words.
  187. var singleToPlural = map[string]string{}
  188. // pluralToSingle is the highest priority map for Singularize().
  189. // singularToPluralSuffixList is used to build singularRules for suffixes and
  190. // compound words.
  191. var pluralToSingle = map[string]string{}
  192. // NOTE: This map should not be built as reverse map of singleToPlural since
  193. // there are words that has the same plurals.
  194. // build singleToPlural and pluralToSingle with dictionary
  195. func init() {
  196. for _, wd := range dictionary {
  197. if singleToPlural[wd.singular] != "" {
  198. panic(fmt.Errorf("map singleToPlural already has an entry for %s", wd.singular))
  199. }
  200. if wd.uncountable && wd.plural == "" {
  201. wd.plural = wd.singular
  202. }
  203. if wd.plural == "" {
  204. panic(fmt.Errorf("plural for %s is not provided", wd.singular))
  205. }
  206. singleToPlural[wd.singular] = wd.plural
  207. if !wd.unidirectional {
  208. if pluralToSingle[wd.plural] != "" {
  209. panic(fmt.Errorf("map pluralToSingle already has an entry for %s", wd.plural))
  210. }
  211. pluralToSingle[wd.plural] = wd.singular
  212. if wd.alternative != "" {
  213. if pluralToSingle[wd.alternative] != "" {
  214. panic(fmt.Errorf("map pluralToSingle already has an entry for %s", wd.alternative))
  215. }
  216. pluralToSingle[wd.alternative] = wd.singular
  217. }
  218. }
  219. }
  220. }
  221. type singularToPluralSuffix struct {
  222. singular string
  223. plural string
  224. }
  225. // singularToPluralSuffixList is a list of "bidirectional" suffix rules for
  226. // the irregular plurals follow such rules.
  227. //
  228. // NOTE: IMPORTANT! The order of items in this list is the rule priority, not
  229. // alphabet order. The first match will be used to inflect.
  230. var singularToPluralSuffixList = []singularToPluralSuffix{
  231. // https://en.wiktionary.org/wiki/Appendix:English_irregular_nouns#Rules
  232. // Words that end in -f or -fe change -f or -fe to -ves
  233. {"tive", "tives"}, // exception
  234. {"eaf", "eaves"},
  235. {"oaf", "oaves"},
  236. {"afe", "aves"},
  237. {"arf", "arves"},
  238. {"rfe", "rves"},
  239. {"rf", "rves"},
  240. {"lf", "lves"},
  241. {"fe", "ves"}, // previously '[a-eg-km-z]fe' TODO: regex support
  242. // Words that end in -y preceded by a consonant change -y to -ies
  243. {"ay", "ays"},
  244. {"ey", "eys"},
  245. {"oy", "oys"},
  246. {"quy", "quies"},
  247. {"uy", "uys"},
  248. {"y", "ies"}, // '[^aeiou]y'
  249. // Words from French that end in -u add an x (eg, château becomes châteaux)
  250. {"eau", "eaux"}, // it seems like 'eau' is the most popular form of this rule
  251. // Words from Latin that end in -a change -a to -ae; before -on to -a and -um to -a
  252. {"bula", "bulae"},
  253. {"dula", "bulae"},
  254. {"lula", "bulae"},
  255. {"nula", "bulae"},
  256. {"vula", "bulae"},
  257. // Words from Greek that end in -on change -on to -a (eg, polyhedron becomes polyhedra)
  258. // https://en.wiktionary.org/wiki/Category:English_irregular_plurals_ending_in_"-a"
  259. {"hedron", "hedra"},
  260. // Words from Latin that end in -um change -um to -a (eg, minimum becomes minima)
  261. // https://en.wiktionary.org/wiki/Category:English_irregular_plurals_ending_in_"-a"
  262. {"ium", "ia"}, // some exceptions especially chemical element names
  263. {"seum", "seums"},
  264. {"eum", "ea"},
  265. {"oum", "oa"},
  266. {"stracum", "straca"},
  267. {"dum", "da"},
  268. {"elum", "ela"},
  269. {"ilum", "ila"},
  270. {"olum", "ola"},
  271. {"ulum", "ula"},
  272. {"llum", "lla"},
  273. {"ylum", "yla"},
  274. {"imum", "ima"},
  275. {"ernum", "erna"},
  276. {"gnum", "gna"},
  277. {"brum", "bra"},
  278. {"crum", "cra"},
  279. {"terum", "tera"},
  280. {"serum", "sera"},
  281. {"trum", "tra"},
  282. {"antum", "anta"},
  283. {"atum", "ata"},
  284. {"entum", "enta"},
  285. {"etum", "eta"},
  286. {"itum", "ita"},
  287. {"otum", "ota"},
  288. {"utum", "uta"},
  289. {"ctum", "cta"},
  290. {"ovum", "ova"},
  291. // Words from Latin that end in -us change -us to -i or -era
  292. // not easy to make a simple rule. just add them all to the dictionary
  293. // Words from Latin that end in -ex change -ex to -ices (eg, vortex becomes vortices)
  294. // Words from Latin that end in -ix change -ix to -ices (eg, matrix becomes matrices)
  295. // for example, -dix, -dex, and -dice will have the same plural form so
  296. // making a simple rule is not possible for them
  297. {"trix", "trices"}, // ignore a few words end in trice
  298. // Words from Latin that end in -is change -is to -es (eg, thesis becomes theses)
  299. // -sis and -se has the same plural -ses so making a rule is not easy too.
  300. {"iasis", "iases"},
  301. {"mesis", "meses"},
  302. {"kinesis", "kineses"},
  303. {"resis", "reses"},
  304. {"gnosis", "gnoses"}, // e.g. diagnosis
  305. {"opsis", "opses"}, // e.g. synopsis
  306. {"ysis", "yses"}, // e.g. analysis
  307. // Words that end in -ch, -o, -s, -sh, -x, -z
  308. {"ouse", "ouses"},
  309. {"lause", "lauses"},
  310. {"us", "uses"}, // use/uses is in the dictionary
  311. {"ch", "ches"},
  312. {"io", "ios"},
  313. {"sh", "shes"},
  314. {"ss", "sses"},
  315. {"ez", "ezzes"},
  316. {"iz", "izzes"},
  317. {"tz", "tzes"},
  318. {"zz", "zzes"},
  319. {"ano", "anos"},
  320. {"lo", "los"},
  321. {"to", "tos"},
  322. {"oo", "oos"},
  323. {"o", "oes"},
  324. {"x", "xes"},
  325. // for abbreviations
  326. {"S", "Ses"},
  327. // excluded rules: seems rare
  328. // Words from Hebrew that add -im or -ot (eg, cherub becomes cherubim)
  329. // - cherub (cherubs or cherubim), seraph (seraphs or seraphim)
  330. // Words from Greek that end in -ma change -ma to -mata
  331. // - The most of words end in -ma are in this category but it looks like
  332. // just adding -s is more popular.
  333. // Words from Latin that end in -nx change -nx to -nges
  334. // - The most of words end in -nx are in this category but it looks like
  335. // just adding -es is more popular. (sphinxes)
  336. // excluded rules: don't care at least for now:
  337. // Words that end in -ful that add an s after the -ful
  338. // Words that end in -s or -ese denoting a national of a particular country
  339. // Symbols or letters, which often add -'s
  340. }
  341. func init() {
  342. for i := len(singularToPluralSuffixList) - 1; i >= 0; i-- {
  343. InsertPluralRule(singularToPluralSuffixList[i].singular, singularToPluralSuffixList[i].plural)
  344. InsertSingularRule(singularToPluralSuffixList[i].plural, singularToPluralSuffixList[i].singular)
  345. }
  346. // build pluralRule and singularRule with dictionary for compound words
  347. for _, wd := range dictionary {
  348. if wd.exact {
  349. continue
  350. }
  351. if wd.uncountable && wd.plural == "" {
  352. wd.plural = wd.singular
  353. }
  354. InsertPluralRule(wd.singular, wd.plural)
  355. if !wd.unidirectional {
  356. InsertSingularRule(wd.plural, wd.singular)
  357. if wd.alternative != "" {
  358. InsertSingularRule(wd.alternative, wd.singular)
  359. }
  360. }
  361. }
  362. }