| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417 |
- package flect
- import "fmt"
- var pluralRules = []rule{}
- // AddPlural adds a rule that will replace the given suffix with the replacement suffix.
- // The name is confusing. This function will be deprecated in the next release.
- func AddPlural(suffix string, repl string) {
- InsertPluralRule(suffix, repl)
- }
- // InsertPluralRule inserts a rule that will replace the given suffix with
- // the repl(acement) at the begining of the list of the pluralize rules.
- func InsertPluralRule(suffix, repl string) {
- pluralMoot.Lock()
- defer pluralMoot.Unlock()
- pluralRules = append([]rule{{
- suffix: suffix,
- fn: simpleRuleFunc(suffix, repl),
- }}, pluralRules...)
- pluralRules = append([]rule{{
- suffix: repl,
- fn: noop,
- }}, pluralRules...)
- }
- type word struct {
- singular string
- plural string
- alternative string
- unidirectional bool // plural to singular is not possible (or bad)
- uncountable bool
- exact bool
- }
- // dictionary is the main table for singularize and pluralize.
- // All words in the dictionary will be added to singleToPlural, pluralToSingle
- // and singlePluralAssertions by init() functions.
- var dictionary = []word{
- // identicals https://en.wikipedia.org/wiki/English_plurals#Nouns_with_identical_singular_and_plural
- {singular: "aircraft", plural: "aircraft"},
- {singular: "beef", plural: "beef", alternative: "beefs"},
- {singular: "bison", plural: "bison"},
- {singular: "blues", plural: "blues", unidirectional: true},
- {singular: "chassis", plural: "chassis"},
- {singular: "deer", plural: "deer"},
- {singular: "fish", plural: "fish", alternative: "fishes"},
- {singular: "moose", plural: "moose"},
- {singular: "police", plural: "police"},
- {singular: "salmon", plural: "salmon", alternative: "salmons"},
- {singular: "series", plural: "series"},
- {singular: "sheep", plural: "sheep"},
- {singular: "shrimp", plural: "shrimp", alternative: "shrimps"},
- {singular: "species", plural: "species"},
- {singular: "swine", plural: "swine", alternative: "swines"},
- {singular: "trout", plural: "trout", alternative: "trouts"},
- {singular: "tuna", plural: "tuna", alternative: "tunas"},
- {singular: "you", plural: "you"},
- // -en https://en.wikipedia.org/wiki/English_plurals#Plurals_in_-(e)n
- {singular: "child", plural: "children"},
- {singular: "ox", plural: "oxen", exact: true},
- // apophonic https://en.wikipedia.org/wiki/English_plurals#Apophonic_plurals
- {singular: "foot", plural: "feet"},
- {singular: "goose", plural: "geese"},
- {singular: "man", plural: "men"},
- {singular: "human", plural: "humans"}, // not humen
- {singular: "louse", plural: "lice", exact: true},
- {singular: "mouse", plural: "mice"},
- {singular: "tooth", plural: "teeth"},
- {singular: "woman", plural: "women"},
- // misc https://en.wikipedia.org/wiki/English_plurals#Miscellaneous_irregular_plurals
- {singular: "die", plural: "dice", exact: true},
- {singular: "person", plural: "people"},
- // Words from French that end in -u add an x; in addition to eau to eaux rule
- {singular: "adieu", plural: "adieux", alternative: "adieus"},
- {singular: "fabliau", plural: "fabliaux"},
- {singular: "bureau", plural: "bureaus", alternative: "bureaux"}, // popular
- // Words from Greek that end in -on change -on to -a; in addition to hedron rule
- {singular: "criterion", plural: "criteria"},
- {singular: "ganglion", plural: "ganglia", alternative: "ganglions"},
- {singular: "lexicon", plural: "lexica", alternative: "lexicons"},
- {singular: "mitochondrion", plural: "mitochondria", alternative: "mitochondrions"},
- {singular: "noumenon", plural: "noumena"},
- {singular: "phenomenon", plural: "phenomena"},
- {singular: "taxon", plural: "taxa"},
- // Words from Latin that end in -um change -um to -a; in addition to some rules
- {singular: "media", plural: "media"}, // popular case: media -> media
- {singular: "medium", plural: "media", alternative: "mediums", unidirectional: true},
- {singular: "stadium", plural: "stadiums", alternative: "stadia"},
- {singular: "aquarium", plural: "aquaria", alternative: "aquariums"},
- {singular: "auditorium", plural: "auditoria", alternative: "auditoriums"},
- {singular: "symposium", plural: "symposia", alternative: "symposiums"},
- {singular: "curriculum", plural: "curriculums", alternative: "curricula"}, // ulum
- {singular: "quota", plural: "quotas"},
- // Words from Latin that end in -us change -us to -i or -era
- {singular: "alumnus", plural: "alumni", alternative: "alumnuses"}, // -i
- {singular: "bacillus", plural: "bacilli"},
- {singular: "cactus", plural: "cacti", alternative: "cactuses"},
- {singular: "coccus", plural: "cocci"},
- {singular: "focus", plural: "foci", alternative: "focuses"},
- {singular: "locus", plural: "loci", alternative: "locuses"},
- {singular: "nucleus", plural: "nuclei", alternative: "nucleuses"},
- {singular: "octopus", plural: "octupuses", alternative: "octopi"},
- {singular: "radius", plural: "radii", alternative: "radiuses"},
- {singular: "syllabus", plural: "syllabi"},
- {singular: "corpus", plural: "corpora", alternative: "corpuses"}, // -ra
- {singular: "genus", plural: "genera"},
- // Words from Latin that end in -a change -a to -ae
- {singular: "alumna", plural: "alumnae"},
- {singular: "vertebra", plural: "vertebrae"},
- {singular: "differentia", plural: "differentiae"}, // -tia
- {singular: "minutia", plural: "minutiae"},
- {singular: "vita", plural: "vitae"}, // -ita
- {singular: "larva", plural: "larvae"}, // -va
- {singular: "postcava", plural: "postcavae"},
- {singular: "praecava", plural: "praecavae"},
- {singular: "uva", plural: "uvae"},
- // Words from Latin that end in -ex change -ex to -ices
- {singular: "apex", plural: "apices", alternative: "apexes"},
- {singular: "codex", plural: "codices", alternative: "codexes"},
- {singular: "index", plural: "indices", alternative: "indexes"},
- {singular: "latex", plural: "latices", alternative: "latexes"},
- {singular: "vertex", plural: "vertices", alternative: "vertexes"},
- {singular: "vortex", plural: "vortices", alternative: "vortexes"},
- // Words from Latin that end in -ix change -ix to -ices (eg, matrix becomes matrices)
- {singular: "appendix", plural: "appendices", alternative: "appendixes"},
- {singular: "radix", plural: "radices", alternative: "radixes"},
- {singular: "helix", plural: "helices", alternative: "helixes"},
- // Words from Latin that end in -is change -is to -es
- {singular: "axis", plural: "axes", exact: true},
- {singular: "crisis", plural: "crises"},
- {singular: "ellipsis", plural: "ellipses", unidirectional: true}, // ellipse
- {singular: "genesis", plural: "geneses"},
- {singular: "oasis", plural: "oases"},
- {singular: "thesis", plural: "theses"},
- {singular: "testis", plural: "testes"},
- {singular: "base", plural: "bases"}, // popular case
- {singular: "basis", plural: "bases", unidirectional: true},
- {singular: "alias", plural: "aliases", exact: true}, // no alia, no aliasis
- {singular: "vedalia", plural: "vedalias"}, // no vedalium, no vedaliases
- // Words that end in -ch, -o, -s, -sh, -x, -z (can be conflict with the others)
- {singular: "use", plural: "uses", exact: true}, // us vs use
- {singular: "abuse", plural: "abuses"},
- {singular: "cause", plural: "causes"},
- {singular: "clause", plural: "clauses"},
- {singular: "cruse", plural: "cruses"},
- {singular: "excuse", plural: "excuses"},
- {singular: "fuse", plural: "fuses"},
- {singular: "house", plural: "houses"},
- {singular: "misuse", plural: "misuses"},
- {singular: "muse", plural: "muses"},
- {singular: "pause", plural: "pauses"},
- {singular: "ache", plural: "aches"},
- {singular: "topaz", plural: "topazes"},
- {singular: "buffalo", plural: "buffaloes", alternative: "buffalos"},
- {singular: "potato", plural: "potatoes"},
- {singular: "tomato", plural: "tomatoes"},
- // uncountables
- {singular: "equipment", uncountable: true},
- {singular: "information", uncountable: true},
- {singular: "jeans", uncountable: true},
- {singular: "money", uncountable: true},
- {singular: "news", uncountable: true},
- {singular: "rice", uncountable: true},
- // exceptions: -f to -ves, not -fe
- {singular: "dwarf", plural: "dwarfs", alternative: "dwarves"},
- {singular: "hoof", plural: "hoofs", alternative: "hooves"},
- {singular: "thief", plural: "thieves"},
- // exceptions: instead of -f(e) to -ves
- {singular: "chive", plural: "chives"},
- {singular: "hive", plural: "hives"},
- {singular: "move", plural: "moves"},
- // exceptions: instead of -y to -ies
- {singular: "movie", plural: "movies"},
- {singular: "cookie", plural: "cookies"},
- // exceptions: instead of -um to -a
- {singular: "pretorium", plural: "pretoriums"},
- {singular: "agenda", plural: "agendas"}, // instead of plural of agendum
- // exceptions: instead of -um to -a (chemical element names)
- // Words from Latin that end in -a change -a to -ae
- {singular: "formula", plural: "formulas", alternative: "formulae"}, // also -um/-a
- // exceptions: instead of -o to -oes
- {singular: "shoe", plural: "shoes"},
- {singular: "toe", plural: "toes", exact: true},
- {singular: "graffiti", plural: "graffiti"},
- // abbreviations
- {singular: "ID", plural: "IDs", exact: true},
- }
- // singleToPlural is the highest priority map for Pluralize().
- // singularToPluralSuffixList is used to build pluralRules for suffixes and
- // compound words.
- var singleToPlural = map[string]string{}
- // pluralToSingle is the highest priority map for Singularize().
- // singularToPluralSuffixList is used to build singularRules for suffixes and
- // compound words.
- var pluralToSingle = map[string]string{}
- // NOTE: This map should not be built as reverse map of singleToPlural since
- // there are words that has the same plurals.
- // build singleToPlural and pluralToSingle with dictionary
- func init() {
- for _, wd := range dictionary {
- if singleToPlural[wd.singular] != "" {
- panic(fmt.Errorf("map singleToPlural already has an entry for %s", wd.singular))
- }
- if wd.uncountable && wd.plural == "" {
- wd.plural = wd.singular
- }
- if wd.plural == "" {
- panic(fmt.Errorf("plural for %s is not provided", wd.singular))
- }
- singleToPlural[wd.singular] = wd.plural
- if !wd.unidirectional {
- if pluralToSingle[wd.plural] != "" {
- panic(fmt.Errorf("map pluralToSingle already has an entry for %s", wd.plural))
- }
- pluralToSingle[wd.plural] = wd.singular
- if wd.alternative != "" {
- if pluralToSingle[wd.alternative] != "" {
- panic(fmt.Errorf("map pluralToSingle already has an entry for %s", wd.alternative))
- }
- pluralToSingle[wd.alternative] = wd.singular
- }
- }
- }
- }
- type singularToPluralSuffix struct {
- singular string
- plural string
- }
- // singularToPluralSuffixList is a list of "bidirectional" suffix rules for
- // the irregular plurals follow such rules.
- //
- // NOTE: IMPORTANT! The order of items in this list is the rule priority, not
- // alphabet order. The first match will be used to inflect.
- var singularToPluralSuffixList = []singularToPluralSuffix{
- // https://en.wiktionary.org/wiki/Appendix:English_irregular_nouns#Rules
- // Words that end in -f or -fe change -f or -fe to -ves
- {"tive", "tives"}, // exception
- {"eaf", "eaves"},
- {"oaf", "oaves"},
- {"afe", "aves"},
- {"arf", "arves"},
- {"rfe", "rves"},
- {"rf", "rves"},
- {"lf", "lves"},
- {"fe", "ves"}, // previously '[a-eg-km-z]fe' TODO: regex support
- // Words that end in -y preceded by a consonant change -y to -ies
- {"ay", "ays"},
- {"ey", "eys"},
- {"oy", "oys"},
- {"quy", "quies"},
- {"uy", "uys"},
- {"y", "ies"}, // '[^aeiou]y'
- // Words from French that end in -u add an x (eg, château becomes châteaux)
- {"eau", "eaux"}, // it seems like 'eau' is the most popular form of this rule
- // Words from Latin that end in -a change -a to -ae; before -on to -a and -um to -a
- {"bula", "bulae"},
- {"dula", "bulae"},
- {"lula", "bulae"},
- {"nula", "bulae"},
- {"vula", "bulae"},
- // Words from Greek that end in -on change -on to -a (eg, polyhedron becomes polyhedra)
- // https://en.wiktionary.org/wiki/Category:English_irregular_plurals_ending_in_"-a"
- {"hedron", "hedra"},
- // Words from Latin that end in -um change -um to -a (eg, minimum becomes minima)
- // https://en.wiktionary.org/wiki/Category:English_irregular_plurals_ending_in_"-a"
- {"ium", "ia"}, // some exceptions especially chemical element names
- {"seum", "seums"},
- {"eum", "ea"},
- {"oum", "oa"},
- {"stracum", "straca"},
- {"dum", "da"},
- {"elum", "ela"},
- {"ilum", "ila"},
- {"olum", "ola"},
- {"ulum", "ula"},
- {"llum", "lla"},
- {"ylum", "yla"},
- {"imum", "ima"},
- {"ernum", "erna"},
- {"gnum", "gna"},
- {"brum", "bra"},
- {"crum", "cra"},
- {"terum", "tera"},
- {"serum", "sera"},
- {"trum", "tra"},
- {"antum", "anta"},
- {"atum", "ata"},
- {"entum", "enta"},
- {"etum", "eta"},
- {"itum", "ita"},
- {"otum", "ota"},
- {"utum", "uta"},
- {"ctum", "cta"},
- {"ovum", "ova"},
- // Words from Latin that end in -us change -us to -i or -era
- // not easy to make a simple rule. just add them all to the dictionary
- // Words from Latin that end in -ex change -ex to -ices (eg, vortex becomes vortices)
- // Words from Latin that end in -ix change -ix to -ices (eg, matrix becomes matrices)
- // for example, -dix, -dex, and -dice will have the same plural form so
- // making a simple rule is not possible for them
- {"trix", "trices"}, // ignore a few words end in trice
- // Words from Latin that end in -is change -is to -es (eg, thesis becomes theses)
- // -sis and -se has the same plural -ses so making a rule is not easy too.
- {"iasis", "iases"},
- {"mesis", "meses"},
- {"kinesis", "kineses"},
- {"resis", "reses"},
- {"gnosis", "gnoses"}, // e.g. diagnosis
- {"opsis", "opses"}, // e.g. synopsis
- {"ysis", "yses"}, // e.g. analysis
- // Words that end in -ch, -o, -s, -sh, -x, -z
- {"ouse", "ouses"},
- {"lause", "lauses"},
- {"us", "uses"}, // use/uses is in the dictionary
- {"ch", "ches"},
- {"io", "ios"},
- {"sh", "shes"},
- {"ss", "sses"},
- {"ez", "ezzes"},
- {"iz", "izzes"},
- {"tz", "tzes"},
- {"zz", "zzes"},
- {"ano", "anos"},
- {"lo", "los"},
- {"to", "tos"},
- {"oo", "oos"},
- {"o", "oes"},
- {"x", "xes"},
- // for abbreviations
- {"S", "Ses"},
- // excluded rules: seems rare
- // Words from Hebrew that add -im or -ot (eg, cherub becomes cherubim)
- // - cherub (cherubs or cherubim), seraph (seraphs or seraphim)
- // Words from Greek that end in -ma change -ma to -mata
- // - The most of words end in -ma are in this category but it looks like
- // just adding -s is more popular.
- // Words from Latin that end in -nx change -nx to -nges
- // - The most of words end in -nx are in this category but it looks like
- // just adding -es is more popular. (sphinxes)
- // excluded rules: don't care at least for now:
- // Words that end in -ful that add an s after the -ful
- // Words that end in -s or -ese denoting a national of a particular country
- // Symbols or letters, which often add -'s
- }
- func init() {
- for i := len(singularToPluralSuffixList) - 1; i >= 0; i-- {
- InsertPluralRule(singularToPluralSuffixList[i].singular, singularToPluralSuffixList[i].plural)
- InsertSingularRule(singularToPluralSuffixList[i].plural, singularToPluralSuffixList[i].singular)
- }
- // build pluralRule and singularRule with dictionary for compound words
- for _, wd := range dictionary {
- if wd.exact {
- continue
- }
- if wd.uncountable && wd.plural == "" {
- wd.plural = wd.singular
- }
- InsertPluralRule(wd.singular, wd.plural)
- if !wd.unidirectional {
- InsertSingularRule(wd.plural, wd.singular)
- if wd.alternative != "" {
- InsertSingularRule(wd.alternative, wd.singular)
- }
- }
- }
- }
|