report_slices.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. // Copyright 2019, The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE.md file.
  4. package cmp
  5. import (
  6. "bytes"
  7. "fmt"
  8. "reflect"
  9. "strconv"
  10. "strings"
  11. "unicode"
  12. "unicode/utf8"
  13. "github.com/google/go-cmp/cmp/internal/diff"
  14. )
  15. // CanFormatDiffSlice reports whether we support custom formatting for nodes
  16. // that are slices of primitive kinds or strings.
  17. func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
  18. switch {
  19. case opts.DiffMode != diffUnknown:
  20. return false // Must be formatting in diff mode
  21. case v.NumDiff == 0:
  22. return false // No differences detected
  23. case !v.ValueX.IsValid() || !v.ValueY.IsValid():
  24. return false // Both values must be valid
  25. case v.Type.Kind() == reflect.Slice && (v.ValueX.Len() == 0 || v.ValueY.Len() == 0):
  26. return false // Both slice values have to be non-empty
  27. case v.NumIgnored > 0:
  28. return false // Some ignore option was used
  29. case v.NumTransformed > 0:
  30. return false // Some transform option was used
  31. case v.NumCompared > 1:
  32. return false // More than one comparison was used
  33. case v.NumCompared == 1 && v.Type.Name() != "":
  34. // The need for cmp to check applicability of options on every element
  35. // in a slice is a significant performance detriment for large []byte.
  36. // The workaround is to specify Comparer(bytes.Equal),
  37. // which enables cmp to compare []byte more efficiently.
  38. // If they differ, we still want to provide batched diffing.
  39. // The logic disallows named types since they tend to have their own
  40. // String method, with nicer formatting than what this provides.
  41. return false
  42. }
  43. switch t := v.Type; t.Kind() {
  44. case reflect.String:
  45. case reflect.Array, reflect.Slice:
  46. // Only slices of primitive types have specialized handling.
  47. switch t.Elem().Kind() {
  48. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
  49. reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
  50. reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
  51. default:
  52. return false
  53. }
  54. // If a sufficient number of elements already differ,
  55. // use specialized formatting even if length requirement is not met.
  56. if v.NumDiff > v.NumSame {
  57. return true
  58. }
  59. default:
  60. return false
  61. }
  62. // Use specialized string diffing for longer slices or strings.
  63. const minLength = 64
  64. return v.ValueX.Len() >= minLength && v.ValueY.Len() >= minLength
  65. }
  66. // FormatDiffSlice prints a diff for the slices (or strings) represented by v.
  67. // This provides custom-tailored logic to make printing of differences in
  68. // textual strings and slices of primitive kinds more readable.
  69. func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
  70. assert(opts.DiffMode == diffUnknown)
  71. t, vx, vy := v.Type, v.ValueX, v.ValueY
  72. // Auto-detect the type of the data.
  73. var isLinedText, isText, isBinary bool
  74. var sx, sy string
  75. switch {
  76. case t.Kind() == reflect.String:
  77. sx, sy = vx.String(), vy.String()
  78. isText = true // Initial estimate, verify later
  79. case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)):
  80. sx, sy = string(vx.Bytes()), string(vy.Bytes())
  81. isBinary = true // Initial estimate, verify later
  82. case t.Kind() == reflect.Array:
  83. // Arrays need to be addressable for slice operations to work.
  84. vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
  85. vx2.Set(vx)
  86. vy2.Set(vy)
  87. vx, vy = vx2, vy2
  88. }
  89. if isText || isBinary {
  90. var numLines, lastLineIdx, maxLineLen int
  91. isBinary = !utf8.ValidString(sx) || !utf8.ValidString(sy)
  92. for i, r := range sx + sy {
  93. if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError {
  94. isBinary = true
  95. break
  96. }
  97. if r == '\n' {
  98. if maxLineLen < i-lastLineIdx {
  99. maxLineLen = i - lastLineIdx
  100. }
  101. lastLineIdx = i + 1
  102. numLines++
  103. }
  104. }
  105. isText = !isBinary
  106. isLinedText = isText && numLines >= 4 && maxLineLen <= 1024
  107. }
  108. // Format the string into printable records.
  109. var list textList
  110. var delim string
  111. switch {
  112. // If the text appears to be multi-lined text,
  113. // then perform differencing across individual lines.
  114. case isLinedText:
  115. ssx := strings.Split(sx, "\n")
  116. ssy := strings.Split(sy, "\n")
  117. list = opts.formatDiffSlice(
  118. reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
  119. func(v reflect.Value, d diffMode) textRecord {
  120. s := formatString(v.Index(0).String())
  121. return textRecord{Diff: d, Value: textLine(s)}
  122. },
  123. )
  124. delim = "\n"
  125. // If possible, use a custom triple-quote (""") syntax for printing
  126. // differences in a string literal. This format is more readable,
  127. // but has edge-cases where differences are visually indistinguishable.
  128. // This format is avoided under the following conditions:
  129. // • A line starts with `"""`
  130. // • A line starts with "..."
  131. // • A line contains non-printable characters
  132. // • Adjacent different lines differ only by whitespace
  133. //
  134. // For example:
  135. // """
  136. // ... // 3 identical lines
  137. // foo
  138. // bar
  139. // - baz
  140. // + BAZ
  141. // """
  142. isTripleQuoted := true
  143. prevRemoveLines := map[string]bool{}
  144. prevInsertLines := map[string]bool{}
  145. var list2 textList
  146. list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
  147. for _, r := range list {
  148. if !r.Value.Equal(textEllipsis) {
  149. line, _ := strconv.Unquote(string(r.Value.(textLine)))
  150. line = strings.TrimPrefix(strings.TrimSuffix(line, "\r"), "\r") // trim leading/trailing carriage returns for legacy Windows endline support
  151. normLine := strings.Map(func(r rune) rune {
  152. if unicode.IsSpace(r) {
  153. return -1 // drop whitespace to avoid visually indistinguishable output
  154. }
  155. return r
  156. }, line)
  157. isPrintable := func(r rune) bool {
  158. return unicode.IsPrint(r) || r == '\t' // specially treat tab as printable
  159. }
  160. isTripleQuoted = !strings.HasPrefix(line, `"""`) && !strings.HasPrefix(line, "...") && strings.TrimFunc(line, isPrintable) == ""
  161. switch r.Diff {
  162. case diffRemoved:
  163. isTripleQuoted = isTripleQuoted && !prevInsertLines[normLine]
  164. prevRemoveLines[normLine] = true
  165. case diffInserted:
  166. isTripleQuoted = isTripleQuoted && !prevRemoveLines[normLine]
  167. prevInsertLines[normLine] = true
  168. }
  169. if !isTripleQuoted {
  170. break
  171. }
  172. r.Value = textLine(line)
  173. r.ElideComma = true
  174. }
  175. if !(r.Diff == diffRemoved || r.Diff == diffInserted) { // start a new non-adjacent difference group
  176. prevRemoveLines = map[string]bool{}
  177. prevInsertLines = map[string]bool{}
  178. }
  179. list2 = append(list2, r)
  180. }
  181. if r := list2[len(list2)-1]; r.Diff == diffIdentical && len(r.Value.(textLine)) == 0 {
  182. list2 = list2[:len(list2)-1] // elide single empty line at the end
  183. }
  184. list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
  185. if isTripleQuoted {
  186. var out textNode = &textWrap{Prefix: "(", Value: list2, Suffix: ")"}
  187. switch t.Kind() {
  188. case reflect.String:
  189. if t != reflect.TypeOf(string("")) {
  190. out = opts.FormatType(t, out)
  191. }
  192. case reflect.Slice:
  193. // Always emit type for slices since the triple-quote syntax
  194. // looks like a string (not a slice).
  195. opts = opts.WithTypeMode(emitType)
  196. out = opts.FormatType(t, out)
  197. }
  198. return out
  199. }
  200. // If the text appears to be single-lined text,
  201. // then perform differencing in approximately fixed-sized chunks.
  202. // The output is printed as quoted strings.
  203. case isText:
  204. list = opts.formatDiffSlice(
  205. reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
  206. func(v reflect.Value, d diffMode) textRecord {
  207. s := formatString(v.String())
  208. return textRecord{Diff: d, Value: textLine(s)}
  209. },
  210. )
  211. delim = ""
  212. // If the text appears to be binary data,
  213. // then perform differencing in approximately fixed-sized chunks.
  214. // The output is inspired by hexdump.
  215. case isBinary:
  216. list = opts.formatDiffSlice(
  217. reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte",
  218. func(v reflect.Value, d diffMode) textRecord {
  219. var ss []string
  220. for i := 0; i < v.Len(); i++ {
  221. ss = append(ss, formatHex(v.Index(i).Uint()))
  222. }
  223. s := strings.Join(ss, ", ")
  224. comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String())))
  225. return textRecord{Diff: d, Value: textLine(s), Comment: comment}
  226. },
  227. )
  228. // For all other slices of primitive types,
  229. // then perform differencing in approximately fixed-sized chunks.
  230. // The size of each chunk depends on the width of the element kind.
  231. default:
  232. var chunkSize int
  233. if t.Elem().Kind() == reflect.Bool {
  234. chunkSize = 16
  235. } else {
  236. switch t.Elem().Bits() {
  237. case 8:
  238. chunkSize = 16
  239. case 16:
  240. chunkSize = 12
  241. case 32:
  242. chunkSize = 8
  243. default:
  244. chunkSize = 8
  245. }
  246. }
  247. list = opts.formatDiffSlice(
  248. vx, vy, chunkSize, t.Elem().Kind().String(),
  249. func(v reflect.Value, d diffMode) textRecord {
  250. var ss []string
  251. for i := 0; i < v.Len(); i++ {
  252. switch t.Elem().Kind() {
  253. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  254. ss = append(ss, fmt.Sprint(v.Index(i).Int()))
  255. case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64:
  256. ss = append(ss, fmt.Sprint(v.Index(i).Uint()))
  257. case reflect.Uint8, reflect.Uintptr:
  258. ss = append(ss, formatHex(v.Index(i).Uint()))
  259. case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
  260. ss = append(ss, fmt.Sprint(v.Index(i).Interface()))
  261. }
  262. }
  263. s := strings.Join(ss, ", ")
  264. return textRecord{Diff: d, Value: textLine(s)}
  265. },
  266. )
  267. }
  268. // Wrap the output with appropriate type information.
  269. var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"}
  270. if !isText {
  271. // The "{...}" byte-sequence literal is not valid Go syntax for strings.
  272. // Emit the type for extra clarity (e.g. "string{...}").
  273. if t.Kind() == reflect.String {
  274. opts = opts.WithTypeMode(emitType)
  275. }
  276. return opts.FormatType(t, out)
  277. }
  278. switch t.Kind() {
  279. case reflect.String:
  280. out = &textWrap{Prefix: "strings.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
  281. if t != reflect.TypeOf(string("")) {
  282. out = opts.FormatType(t, out)
  283. }
  284. case reflect.Slice:
  285. out = &textWrap{Prefix: "bytes.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
  286. if t != reflect.TypeOf([]byte(nil)) {
  287. out = opts.FormatType(t, out)
  288. }
  289. }
  290. return out
  291. }
  292. // formatASCII formats s as an ASCII string.
  293. // This is useful for printing binary strings in a semi-legible way.
  294. func formatASCII(s string) string {
  295. b := bytes.Repeat([]byte{'.'}, len(s))
  296. for i := 0; i < len(s); i++ {
  297. if ' ' <= s[i] && s[i] <= '~' {
  298. b[i] = s[i]
  299. }
  300. }
  301. return string(b)
  302. }
  303. func (opts formatOptions) formatDiffSlice(
  304. vx, vy reflect.Value, chunkSize int, name string,
  305. makeRec func(reflect.Value, diffMode) textRecord,
  306. ) (list textList) {
  307. es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result {
  308. return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface())
  309. })
  310. appendChunks := func(v reflect.Value, d diffMode) int {
  311. n0 := v.Len()
  312. for v.Len() > 0 {
  313. n := chunkSize
  314. if n > v.Len() {
  315. n = v.Len()
  316. }
  317. list = append(list, makeRec(v.Slice(0, n), d))
  318. v = v.Slice(n, v.Len())
  319. }
  320. return n0 - v.Len()
  321. }
  322. var numDiffs int
  323. maxLen := -1
  324. if opts.LimitVerbosity {
  325. maxLen = (1 << opts.verbosity()) << 2 // 4, 8, 16, 32, 64, etc...
  326. opts.VerbosityLevel--
  327. }
  328. groups := coalesceAdjacentEdits(name, es)
  329. groups = coalesceInterveningIdentical(groups, chunkSize/4)
  330. maxGroup := diffStats{Name: name}
  331. for i, ds := range groups {
  332. if maxLen >= 0 && numDiffs >= maxLen {
  333. maxGroup = maxGroup.Append(ds)
  334. continue
  335. }
  336. // Print equal.
  337. if ds.NumDiff() == 0 {
  338. // Compute the number of leading and trailing equal bytes to print.
  339. var numLo, numHi int
  340. numEqual := ds.NumIgnored + ds.NumIdentical
  341. for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 {
  342. numLo++
  343. }
  344. for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
  345. numHi++
  346. }
  347. if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 {
  348. numHi = numEqual - numLo // Avoid pointless coalescing of single equal row
  349. }
  350. // Print the equal bytes.
  351. appendChunks(vx.Slice(0, numLo), diffIdentical)
  352. if numEqual > numLo+numHi {
  353. ds.NumIdentical -= numLo + numHi
  354. list.AppendEllipsis(ds)
  355. }
  356. appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical)
  357. vx = vx.Slice(numEqual, vx.Len())
  358. vy = vy.Slice(numEqual, vy.Len())
  359. continue
  360. }
  361. // Print unequal.
  362. len0 := len(list)
  363. nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved)
  364. vx = vx.Slice(nx, vx.Len())
  365. ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted)
  366. vy = vy.Slice(ny, vy.Len())
  367. numDiffs += len(list) - len0
  368. }
  369. if maxGroup.IsZero() {
  370. assert(vx.Len() == 0 && vy.Len() == 0)
  371. } else {
  372. list.AppendEllipsis(maxGroup)
  373. }
  374. return list
  375. }
  376. // coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
  377. // equal or unequal counts.
  378. func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
  379. var prevCase int // Arbitrary index into which case last occurred
  380. lastStats := func(i int) *diffStats {
  381. if prevCase != i {
  382. groups = append(groups, diffStats{Name: name})
  383. prevCase = i
  384. }
  385. return &groups[len(groups)-1]
  386. }
  387. for _, e := range es {
  388. switch e {
  389. case diff.Identity:
  390. lastStats(1).NumIdentical++
  391. case diff.UniqueX:
  392. lastStats(2).NumRemoved++
  393. case diff.UniqueY:
  394. lastStats(2).NumInserted++
  395. case diff.Modified:
  396. lastStats(2).NumModified++
  397. }
  398. }
  399. return groups
  400. }
  401. // coalesceInterveningIdentical coalesces sufficiently short (<= windowSize)
  402. // equal groups into adjacent unequal groups that currently result in a
  403. // dual inserted/removed printout. This acts as a high-pass filter to smooth
  404. // out high-frequency changes within the windowSize.
  405. func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
  406. groups, groupsOrig := groups[:0], groups
  407. for i, ds := range groupsOrig {
  408. if len(groups) >= 2 && ds.NumDiff() > 0 {
  409. prev := &groups[len(groups)-2] // Unequal group
  410. curr := &groups[len(groups)-1] // Equal group
  411. next := &groupsOrig[i] // Unequal group
  412. hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0
  413. hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0
  414. if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize {
  415. *prev = prev.Append(*curr).Append(*next)
  416. groups = groups[:len(groups)-1] // Truncate off equal group
  417. continue
  418. }
  419. }
  420. groups = append(groups, ds)
  421. }
  422. return groups
  423. }