| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508 |
- // Copyright 2015 go-swagger maintainers
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package swag
- import (
- "bytes"
- "sync"
- "unicode"
- "unicode/utf8"
- )
- type (
- splitter struct {
- initialisms []string
- initialismsRunes [][]rune
- initialismsUpperCased [][]rune // initialisms cached in their trimmed, upper-cased version
- postSplitInitialismCheck bool
- }
- splitterOption func(*splitter)
- initialismMatch struct {
- body []rune
- start, end int
- complete bool
- }
- initialismMatches []initialismMatch
- )
- type (
- // memory pools of temporary objects.
- //
- // These are used to recycle temporarily allocated objects
- // and relieve the GC from undue pressure.
- matchesPool struct {
- *sync.Pool
- }
- buffersPool struct {
- *sync.Pool
- }
- lexemsPool struct {
- *sync.Pool
- }
- splittersPool struct {
- *sync.Pool
- }
- )
- var (
- // poolOfMatches holds temporary slices for recycling during the initialism match process
- poolOfMatches = matchesPool{
- Pool: &sync.Pool{
- New: func() any {
- s := make(initialismMatches, 0, maxAllocMatches)
- return &s
- },
- },
- }
- poolOfBuffers = buffersPool{
- Pool: &sync.Pool{
- New: func() any {
- return new(bytes.Buffer)
- },
- },
- }
- poolOfLexems = lexemsPool{
- Pool: &sync.Pool{
- New: func() any {
- s := make([]nameLexem, 0, maxAllocMatches)
- return &s
- },
- },
- }
- poolOfSplitters = splittersPool{
- Pool: &sync.Pool{
- New: func() any {
- s := newSplitter()
- return &s
- },
- },
- }
- )
- // nameReplaceTable finds a word representation for special characters.
- func nameReplaceTable(r rune) (string, bool) {
- switch r {
- case '@':
- return "At ", true
- case '&':
- return "And ", true
- case '|':
- return "Pipe ", true
- case '$':
- return "Dollar ", true
- case '!':
- return "Bang ", true
- case '-':
- return "", true
- case '_':
- return "", true
- default:
- return "", false
- }
- }
- // split calls the splitter.
- //
- // Use newSplitter for more control and options
- func split(str string) []string {
- s := poolOfSplitters.BorrowSplitter()
- lexems := s.split(str)
- result := make([]string, 0, len(*lexems))
- for _, lexem := range *lexems {
- result = append(result, lexem.GetOriginal())
- }
- poolOfLexems.RedeemLexems(lexems)
- poolOfSplitters.RedeemSplitter(s)
- return result
- }
- func newSplitter(options ...splitterOption) splitter {
- s := splitter{
- postSplitInitialismCheck: false,
- initialisms: initialisms,
- initialismsRunes: initialismsRunes,
- initialismsUpperCased: initialismsUpperCased,
- }
- for _, option := range options {
- option(&s)
- }
- return s
- }
- // withPostSplitInitialismCheck allows to catch initialisms after main split process
- func withPostSplitInitialismCheck(s *splitter) {
- s.postSplitInitialismCheck = true
- }
- func (p matchesPool) BorrowMatches() *initialismMatches {
- s := p.Get().(*initialismMatches)
- *s = (*s)[:0] // reset slice, keep allocated capacity
- return s
- }
- func (p buffersPool) BorrowBuffer(size int) *bytes.Buffer {
- s := p.Get().(*bytes.Buffer)
- s.Reset()
- if s.Cap() < size {
- s.Grow(size)
- }
- return s
- }
- func (p lexemsPool) BorrowLexems() *[]nameLexem {
- s := p.Get().(*[]nameLexem)
- *s = (*s)[:0] // reset slice, keep allocated capacity
- return s
- }
- func (p splittersPool) BorrowSplitter(options ...splitterOption) *splitter {
- s := p.Get().(*splitter)
- s.postSplitInitialismCheck = false // reset options
- for _, apply := range options {
- apply(s)
- }
- return s
- }
- func (p matchesPool) RedeemMatches(s *initialismMatches) {
- p.Put(s)
- }
- func (p buffersPool) RedeemBuffer(s *bytes.Buffer) {
- p.Put(s)
- }
- func (p lexemsPool) RedeemLexems(s *[]nameLexem) {
- p.Put(s)
- }
- func (p splittersPool) RedeemSplitter(s *splitter) {
- p.Put(s)
- }
- func (m initialismMatch) isZero() bool {
- return m.start == 0 && m.end == 0
- }
- func (s splitter) split(name string) *[]nameLexem {
- nameRunes := []rune(name)
- matches := s.gatherInitialismMatches(nameRunes)
- if matches == nil {
- return poolOfLexems.BorrowLexems()
- }
- return s.mapMatchesToNameLexems(nameRunes, matches)
- }
- func (s splitter) gatherInitialismMatches(nameRunes []rune) *initialismMatches {
- var matches *initialismMatches
- for currentRunePosition, currentRune := range nameRunes {
- // recycle these allocations as we loop over runes
- // with such recycling, only 2 slices should be allocated per call
- // instead of o(n).
- newMatches := poolOfMatches.BorrowMatches()
- // check current initialism matches
- if matches != nil { // skip first iteration
- for _, match := range *matches {
- if keepCompleteMatch := match.complete; keepCompleteMatch {
- *newMatches = append(*newMatches, match)
- continue
- }
- // drop failed match
- currentMatchRune := match.body[currentRunePosition-match.start]
- if currentMatchRune != currentRune {
- continue
- }
- // try to complete ongoing match
- if currentRunePosition-match.start == len(match.body)-1 {
- // we are close; the next step is to check the symbol ahead
- // if it is a small letter, then it is not the end of match
- // but beginning of the next word
- if currentRunePosition < len(nameRunes)-1 {
- nextRune := nameRunes[currentRunePosition+1]
- if newWord := unicode.IsLower(nextRune); newWord {
- // oh ok, it was the start of a new word
- continue
- }
- }
- match.complete = true
- match.end = currentRunePosition
- }
- *newMatches = append(*newMatches, match)
- }
- }
- // check for new initialism matches
- for i := range s.initialisms {
- initialismRunes := s.initialismsRunes[i]
- if initialismRunes[0] == currentRune {
- *newMatches = append(*newMatches, initialismMatch{
- start: currentRunePosition,
- body: initialismRunes,
- complete: false,
- })
- }
- }
- if matches != nil {
- poolOfMatches.RedeemMatches(matches)
- }
- matches = newMatches
- }
- // up to the caller to redeem this last slice
- return matches
- }
- func (s splitter) mapMatchesToNameLexems(nameRunes []rune, matches *initialismMatches) *[]nameLexem {
- nameLexems := poolOfLexems.BorrowLexems()
- var lastAcceptedMatch initialismMatch
- for _, match := range *matches {
- if !match.complete {
- continue
- }
- if firstMatch := lastAcceptedMatch.isZero(); firstMatch {
- s.appendBrokenDownCasualString(nameLexems, nameRunes[:match.start])
- *nameLexems = append(*nameLexems, s.breakInitialism(string(match.body)))
- lastAcceptedMatch = match
- continue
- }
- if overlappedMatch := match.start <= lastAcceptedMatch.end; overlappedMatch {
- continue
- }
- middle := nameRunes[lastAcceptedMatch.end+1 : match.start]
- s.appendBrokenDownCasualString(nameLexems, middle)
- *nameLexems = append(*nameLexems, s.breakInitialism(string(match.body)))
- lastAcceptedMatch = match
- }
- // we have not found any accepted matches
- if lastAcceptedMatch.isZero() {
- *nameLexems = (*nameLexems)[:0]
- s.appendBrokenDownCasualString(nameLexems, nameRunes)
- } else if lastAcceptedMatch.end+1 != len(nameRunes) {
- rest := nameRunes[lastAcceptedMatch.end+1:]
- s.appendBrokenDownCasualString(nameLexems, rest)
- }
- poolOfMatches.RedeemMatches(matches)
- return nameLexems
- }
- func (s splitter) breakInitialism(original string) nameLexem {
- return newInitialismNameLexem(original, original)
- }
- func (s splitter) appendBrokenDownCasualString(segments *[]nameLexem, str []rune) {
- currentSegment := poolOfBuffers.BorrowBuffer(len(str)) // unlike strings.Builder, bytes.Buffer initial storage can reused
- defer func() {
- poolOfBuffers.RedeemBuffer(currentSegment)
- }()
- addCasualNameLexem := func(original string) {
- *segments = append(*segments, newCasualNameLexem(original))
- }
- addInitialismNameLexem := func(original, match string) {
- *segments = append(*segments, newInitialismNameLexem(original, match))
- }
- var addNameLexem func(string)
- if s.postSplitInitialismCheck {
- addNameLexem = func(original string) {
- for i := range s.initialisms {
- if isEqualFoldIgnoreSpace(s.initialismsUpperCased[i], original) {
- addInitialismNameLexem(original, s.initialisms[i])
- return
- }
- }
- addCasualNameLexem(original)
- }
- } else {
- addNameLexem = addCasualNameLexem
- }
- for _, rn := range str {
- if replace, found := nameReplaceTable(rn); found {
- if currentSegment.Len() > 0 {
- addNameLexem(currentSegment.String())
- currentSegment.Reset()
- }
- if replace != "" {
- addNameLexem(replace)
- }
- continue
- }
- if !unicode.In(rn, unicode.L, unicode.M, unicode.N, unicode.Pc) {
- if currentSegment.Len() > 0 {
- addNameLexem(currentSegment.String())
- currentSegment.Reset()
- }
- continue
- }
- if unicode.IsUpper(rn) {
- if currentSegment.Len() > 0 {
- addNameLexem(currentSegment.String())
- }
- currentSegment.Reset()
- }
- currentSegment.WriteRune(rn)
- }
- if currentSegment.Len() > 0 {
- addNameLexem(currentSegment.String())
- }
- }
- // isEqualFoldIgnoreSpace is the same as strings.EqualFold, but
- // it ignores leading and trailing blank spaces in the compared
- // string.
- //
- // base is assumed to be composed of upper-cased runes, and be already
- // trimmed.
- //
- // This code is heavily inspired from strings.EqualFold.
- func isEqualFoldIgnoreSpace(base []rune, str string) bool {
- var i, baseIndex int
- // equivalent to b := []byte(str), but without data copy
- b := hackStringBytes(str)
- for i < len(b) {
- if c := b[i]; c < utf8.RuneSelf {
- // fast path for ASCII
- if c != ' ' && c != '\t' {
- break
- }
- i++
- continue
- }
- // unicode case
- r, size := utf8.DecodeRune(b[i:])
- if !unicode.IsSpace(r) {
- break
- }
- i += size
- }
- if i >= len(b) {
- return len(base) == 0
- }
- for _, baseRune := range base {
- if i >= len(b) {
- break
- }
- if c := b[i]; c < utf8.RuneSelf {
- // single byte rune case (ASCII)
- if baseRune >= utf8.RuneSelf {
- return false
- }
- baseChar := byte(baseRune)
- if c != baseChar &&
- !('a' <= c && c <= 'z' && c-'a'+'A' == baseChar) {
- return false
- }
- baseIndex++
- i++
- continue
- }
- // unicode case
- r, size := utf8.DecodeRune(b[i:])
- if unicode.ToUpper(r) != baseRune {
- return false
- }
- baseIndex++
- i += size
- }
- if baseIndex != len(base) {
- return false
- }
- // all passed: now we should only have blanks
- for i < len(b) {
- if c := b[i]; c < utf8.RuneSelf {
- // fast path for ASCII
- if c != ' ' && c != '\t' {
- return false
- }
- i++
- continue
- }
- // unicode case
- r, size := utf8.DecodeRune(b[i:])
- if !unicode.IsSpace(r) {
- return false
- }
- i += size
- }
- return true
- }
|