docstring.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. // +build codegen
  2. package api
  3. import (
  4. "bytes"
  5. "encoding/json"
  6. "fmt"
  7. "html"
  8. "os"
  9. "regexp"
  10. "strings"
  11. xhtml "golang.org/x/net/html"
  12. )
  13. type apiDocumentation struct {
  14. *API
  15. Operations map[string]string
  16. Service string
  17. Shapes map[string]shapeDocumentation
  18. }
  19. type shapeDocumentation struct {
  20. Base string
  21. Refs map[string]string
  22. }
  23. // AttachDocs attaches documentation from a JSON filename.
  24. func (a *API) AttachDocs(filename string) {
  25. d := apiDocumentation{API: a}
  26. f, err := os.Open(filename)
  27. defer f.Close()
  28. if err != nil {
  29. panic(err)
  30. }
  31. err = json.NewDecoder(f).Decode(&d)
  32. if err != nil {
  33. panic(err)
  34. }
  35. d.setup()
  36. }
  37. func (d *apiDocumentation) setup() {
  38. d.API.Documentation = docstring(d.Service)
  39. for opName, doc := range d.Operations {
  40. if _, ok := d.API.Operations[opName]; !ok {
  41. panic(fmt.Sprintf("%s, doc op %q not found in API op set",
  42. d.API.name, opName),
  43. )
  44. }
  45. d.API.Operations[opName].Documentation = docstring(doc)
  46. }
  47. for shape, info := range d.Shapes {
  48. if sh := d.API.Shapes[shape]; sh != nil {
  49. sh.Documentation = docstring(info.Base)
  50. }
  51. for ref, doc := range info.Refs {
  52. if doc == "" {
  53. continue
  54. }
  55. parts := strings.Split(ref, "$")
  56. if len(parts) != 2 {
  57. fmt.Fprintf(os.Stderr, "Shape Doc %s has unexpected reference format, %q\n", shape, ref)
  58. continue
  59. }
  60. if sh := d.API.Shapes[parts[0]]; sh != nil {
  61. if m := sh.MemberRefs[parts[1]]; m != nil {
  62. m.Documentation = docstring(doc)
  63. }
  64. }
  65. }
  66. }
  67. }
  68. var reNewline = regexp.MustCompile(`\r?\n`)
  69. var reMultiSpace = regexp.MustCompile(`\s+`)
  70. var reComments = regexp.MustCompile(`<!--.*?-->`)
  71. var reFullnameBlock = regexp.MustCompile(`<fullname>(.+?)<\/fullname>`)
  72. var reFullname = regexp.MustCompile(`<fullname>(.*?)</fullname>`)
  73. var reExamples = regexp.MustCompile(`<examples?>.+?<\/examples?>`)
  74. var reEndNL = regexp.MustCompile(`\n+$`)
  75. // docstring rewrites a string to insert godocs formatting.
  76. func docstring(doc string) string {
  77. doc = strings.TrimSpace(doc)
  78. if doc == "" {
  79. return ""
  80. }
  81. doc = reNewline.ReplaceAllString(doc, "")
  82. doc = reMultiSpace.ReplaceAllString(doc, " ")
  83. doc = reComments.ReplaceAllString(doc, "")
  84. var fullname string
  85. parts := reFullnameBlock.FindStringSubmatch(doc)
  86. if len(parts) > 1 {
  87. fullname = parts[1]
  88. }
  89. // Remove full name block from doc string
  90. doc = reFullname.ReplaceAllString(doc, "")
  91. doc = reExamples.ReplaceAllString(doc, "")
  92. doc = generateDoc(doc)
  93. doc = reEndNL.ReplaceAllString(doc, "")
  94. doc = html.UnescapeString(doc)
  95. // Replace doc with full name if doc is empty.
  96. doc = strings.TrimSpace(doc)
  97. if len(doc) == 0 {
  98. doc = fullname
  99. }
  100. return commentify(doc)
  101. }
  102. const (
  103. indent = " "
  104. )
  105. // style is what we want to prefix a string with.
  106. // For instance, <li>Foo</li><li>Bar</li>, will generate
  107. // * Foo
  108. // * Bar
  109. var style = map[string]string{
  110. "ul": indent + "* ",
  111. "li": indent + "* ",
  112. "code": indent,
  113. "pre": indent,
  114. }
  115. // commentify converts a string to a Go comment
  116. func commentify(doc string) string {
  117. if len(doc) == 0 {
  118. return ""
  119. }
  120. lines := strings.Split(doc, "\n")
  121. out := make([]string, 0, len(lines))
  122. for i := 0; i < len(lines); i++ {
  123. line := lines[i]
  124. if i > 0 && line == "" && lines[i-1] == "" {
  125. continue
  126. }
  127. out = append(out, line)
  128. }
  129. if len(out) > 0 {
  130. out[0] = "// " + out[0]
  131. return strings.Join(out, "\n// ")
  132. }
  133. return ""
  134. }
  135. // wrap returns a rewritten version of text to have line breaks
  136. // at approximately length characters. Line breaks will only be
  137. // inserted into whitespace.
  138. func wrap(text string, length int, isIndented bool) string {
  139. var buf bytes.Buffer
  140. var last rune
  141. var lastNL bool
  142. var col int
  143. for _, c := range text {
  144. switch c {
  145. case '\r': // ignore this
  146. continue // and also don't track `last`
  147. case '\n': // ignore this too, but reset col
  148. if col >= length || last == '\n' {
  149. buf.WriteString("\n")
  150. }
  151. buf.WriteString("\n")
  152. col = 0
  153. case ' ', '\t': // opportunity to split
  154. if col >= length {
  155. buf.WriteByte('\n')
  156. col = 0
  157. if isIndented {
  158. buf.WriteString(indent)
  159. col += 3
  160. }
  161. } else {
  162. // We only want to write a leading space if the col is greater than zero.
  163. // This will provide the proper spacing for documentation.
  164. buf.WriteRune(c)
  165. col++ // count column
  166. }
  167. default:
  168. buf.WriteRune(c)
  169. col++
  170. }
  171. lastNL = c == '\n'
  172. _ = lastNL
  173. last = c
  174. }
  175. return buf.String()
  176. }
  177. type tagInfo struct {
  178. tag string
  179. key string
  180. val string
  181. txt string
  182. raw string
  183. closingTag bool
  184. }
  185. // generateDoc will generate the proper doc string for html encoded or plain text doc entries.
  186. func generateDoc(htmlSrc string) string {
  187. tokenizer := xhtml.NewTokenizer(strings.NewReader(htmlSrc))
  188. tokens := buildTokenArray(tokenizer)
  189. scopes := findScopes(tokens)
  190. return walk(scopes)
  191. }
  192. func buildTokenArray(tokenizer *xhtml.Tokenizer) []tagInfo {
  193. tokens := []tagInfo{}
  194. for tt := tokenizer.Next(); tt != xhtml.ErrorToken; tt = tokenizer.Next() {
  195. switch tt {
  196. case xhtml.TextToken:
  197. txt := string(tokenizer.Text())
  198. if len(tokens) == 0 {
  199. info := tagInfo{
  200. raw: txt,
  201. }
  202. tokens = append(tokens, info)
  203. }
  204. tn, _ := tokenizer.TagName()
  205. key, val, _ := tokenizer.TagAttr()
  206. info := tagInfo{
  207. tag: string(tn),
  208. key: string(key),
  209. val: string(val),
  210. txt: txt,
  211. }
  212. tokens = append(tokens, info)
  213. case xhtml.StartTagToken:
  214. tn, _ := tokenizer.TagName()
  215. key, val, _ := tokenizer.TagAttr()
  216. info := tagInfo{
  217. tag: string(tn),
  218. key: string(key),
  219. val: string(val),
  220. }
  221. tokens = append(tokens, info)
  222. case xhtml.SelfClosingTagToken, xhtml.EndTagToken:
  223. tn, _ := tokenizer.TagName()
  224. key, val, _ := tokenizer.TagAttr()
  225. info := tagInfo{
  226. tag: string(tn),
  227. key: string(key),
  228. val: string(val),
  229. closingTag: true,
  230. }
  231. tokens = append(tokens, info)
  232. }
  233. }
  234. return tokens
  235. }
  236. // walk is used to traverse each scoped block. These scoped
  237. // blocks will act as blocked text where we do most of our
  238. // text manipulation.
  239. func walk(scopes [][]tagInfo) string {
  240. doc := ""
  241. // Documentation will be chunked by scopes.
  242. // Meaning, for each scope will be divided by one or more newlines.
  243. for _, scope := range scopes {
  244. indentStr, isIndented := priorityIndentation(scope)
  245. block := ""
  246. href := ""
  247. after := false
  248. level := 0
  249. lastTag := ""
  250. for _, token := range scope {
  251. if token.closingTag {
  252. endl := closeTag(token, level)
  253. block += endl
  254. level--
  255. lastTag = ""
  256. } else if token.txt == "" {
  257. if token.val != "" {
  258. href, after = formatText(token, "")
  259. }
  260. if level == 1 && isIndented {
  261. block += indentStr
  262. }
  263. level++
  264. lastTag = token.tag
  265. } else {
  266. if token.txt != " " {
  267. str, _ := formatText(token, lastTag)
  268. block += str
  269. if after {
  270. block += href
  271. after = false
  272. }
  273. } else {
  274. fmt.Println(token.tag)
  275. str, _ := formatText(tagInfo{}, lastTag)
  276. block += str
  277. }
  278. }
  279. }
  280. if !isIndented {
  281. block = strings.TrimPrefix(block, " ")
  282. }
  283. block = wrap(block, 72, isIndented)
  284. doc += block
  285. }
  286. return doc
  287. }
  288. // closeTag will divide up the blocks of documentation to be formated properly.
  289. func closeTag(token tagInfo, level int) string {
  290. switch token.tag {
  291. case "pre", "li", "div":
  292. return "\n"
  293. case "p", "h1", "h2", "h3", "h4", "h5", "h6":
  294. return "\n\n"
  295. case "code":
  296. // indented code is only at the 0th level.
  297. if level == 0 {
  298. return "\n"
  299. }
  300. }
  301. return ""
  302. }
  303. // formatText will format any sort of text based off of a tag. It will also return
  304. // a boolean to add the string after the text token.
  305. func formatText(token tagInfo, lastTag string) (string, bool) {
  306. switch token.tag {
  307. case "a":
  308. if token.val != "" {
  309. return fmt.Sprintf(" (%s)", token.val), true
  310. }
  311. }
  312. // We don't care about a single space nor no text.
  313. if len(token.txt) == 0 || token.txt == " " {
  314. return "", false
  315. }
  316. // Here we want to indent code blocks that are newlines
  317. if lastTag == "code" {
  318. // Greater than one, because we don't care about newlines in the beginning
  319. block := ""
  320. if lines := strings.Split(token.txt, "\n"); len(lines) > 1 {
  321. for _, line := range lines {
  322. block += indent + line
  323. }
  324. block += "\n"
  325. return block, false
  326. }
  327. }
  328. return token.txt, false
  329. }
  330. // This is a parser to check what type of indention is needed.
  331. func priorityIndentation(blocks []tagInfo) (string, bool) {
  332. if len(blocks) == 0 {
  333. return "", false
  334. }
  335. v, ok := style[blocks[0].tag]
  336. return v, ok
  337. }
  338. // Divides into scopes based off levels.
  339. // For instance,
  340. // <p>Testing<code>123</code></p><ul><li>Foo</li></ul>
  341. // This has 2 scopes, the <p> and <ul>
  342. func findScopes(tokens []tagInfo) [][]tagInfo {
  343. level := 0
  344. scope := []tagInfo{}
  345. scopes := [][]tagInfo{}
  346. for _, token := range tokens {
  347. // we will clear empty tagged tokens from the array
  348. txt := strings.TrimSpace(token.txt)
  349. tag := strings.TrimSpace(token.tag)
  350. if len(txt) == 0 && len(tag) == 0 {
  351. continue
  352. }
  353. scope = append(scope, token)
  354. // If it is a closing tag then we check what level
  355. // we are on. If it is 0, then that means we have found a
  356. // scoped block.
  357. if token.closingTag {
  358. level--
  359. if level == 0 {
  360. scopes = append(scopes, scope)
  361. scope = []tagInfo{}
  362. }
  363. // Check opening tags and increment the level
  364. } else if token.txt == "" {
  365. level++
  366. }
  367. }
  368. // In this case, we did not run into a closing tag. This would mean
  369. // we have plaintext for documentation.
  370. if len(scopes) == 0 {
  371. scopes = append(scopes, scope)
  372. }
  373. return scopes
  374. }