decoder.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. // Copyright 2021 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package pkgbits
  5. import (
  6. "encoding/binary"
  7. "fmt"
  8. "go/constant"
  9. "go/token"
  10. "math/big"
  11. "os"
  12. "runtime"
  13. "strings"
  14. )
  15. // A PkgDecoder provides methods for decoding a package's Unified IR
  16. // export data.
  17. type PkgDecoder struct {
  18. // version is the file format version.
  19. version uint32
  20. // sync indicates whether the file uses sync markers.
  21. sync bool
  22. // pkgPath is the package path for the package to be decoded.
  23. //
  24. // TODO(mdempsky): Remove; unneeded since CL 391014.
  25. pkgPath string
  26. // elemData is the full data payload of the encoded package.
  27. // Elements are densely and contiguously packed together.
  28. //
  29. // The last 8 bytes of elemData are the package fingerprint.
  30. elemData string
  31. // elemEnds stores the byte-offset end positions of element
  32. // bitstreams within elemData.
  33. //
  34. // For example, element I's bitstream data starts at elemEnds[I-1]
  35. // (or 0, if I==0) and ends at elemEnds[I].
  36. //
  37. // Note: elemEnds is indexed by absolute indices, not
  38. // section-relative indices.
  39. elemEnds []uint32
  40. // elemEndsEnds stores the index-offset end positions of relocation
  41. // sections within elemEnds.
  42. //
  43. // For example, section K's end positions start at elemEndsEnds[K-1]
  44. // (or 0, if K==0) and end at elemEndsEnds[K].
  45. elemEndsEnds [numRelocs]uint32
  46. }
  47. // PkgPath returns the package path for the package
  48. //
  49. // TODO(mdempsky): Remove; unneeded since CL 391014.
  50. func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath }
  51. // SyncMarkers reports whether pr uses sync markers.
  52. func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync }
  53. // NewPkgDecoder returns a PkgDecoder initialized to read the Unified
  54. // IR export data from input. pkgPath is the package path for the
  55. // compilation unit that produced the export data.
  56. //
  57. // TODO(mdempsky): Remove pkgPath parameter; unneeded since CL 391014.
  58. func NewPkgDecoder(pkgPath, input string) PkgDecoder {
  59. pr := PkgDecoder{
  60. pkgPath: pkgPath,
  61. }
  62. // TODO(mdempsky): Implement direct indexing of input string to
  63. // avoid copying the position information.
  64. r := strings.NewReader(input)
  65. assert(binary.Read(r, binary.LittleEndian, &pr.version) == nil)
  66. switch pr.version {
  67. default:
  68. panic(fmt.Errorf("unsupported version: %v", pr.version))
  69. case 0:
  70. // no flags
  71. case 1:
  72. var flags uint32
  73. assert(binary.Read(r, binary.LittleEndian, &flags) == nil)
  74. pr.sync = flags&flagSyncMarkers != 0
  75. }
  76. assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil)
  77. pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1])
  78. assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil)
  79. pos, err := r.Seek(0, os.SEEK_CUR)
  80. assert(err == nil)
  81. pr.elemData = input[pos:]
  82. assert(len(pr.elemData)-8 == int(pr.elemEnds[len(pr.elemEnds)-1]))
  83. return pr
  84. }
  85. // NumElems returns the number of elements in section k.
  86. func (pr *PkgDecoder) NumElems(k RelocKind) int {
  87. count := int(pr.elemEndsEnds[k])
  88. if k > 0 {
  89. count -= int(pr.elemEndsEnds[k-1])
  90. }
  91. return count
  92. }
  93. // TotalElems returns the total number of elements across all sections.
  94. func (pr *PkgDecoder) TotalElems() int {
  95. return len(pr.elemEnds)
  96. }
  97. // Fingerprint returns the package fingerprint.
  98. func (pr *PkgDecoder) Fingerprint() [8]byte {
  99. var fp [8]byte
  100. copy(fp[:], pr.elemData[len(pr.elemData)-8:])
  101. return fp
  102. }
  103. // AbsIdx returns the absolute index for the given (section, index)
  104. // pair.
  105. func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int {
  106. absIdx := int(idx)
  107. if k > 0 {
  108. absIdx += int(pr.elemEndsEnds[k-1])
  109. }
  110. if absIdx >= int(pr.elemEndsEnds[k]) {
  111. errorf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds)
  112. }
  113. return absIdx
  114. }
  115. // DataIdx returns the raw element bitstream for the given (section,
  116. // index) pair.
  117. func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string {
  118. absIdx := pr.AbsIdx(k, idx)
  119. var start uint32
  120. if absIdx > 0 {
  121. start = pr.elemEnds[absIdx-1]
  122. }
  123. end := pr.elemEnds[absIdx]
  124. return pr.elemData[start:end]
  125. }
  126. // StringIdx returns the string value for the given string index.
  127. func (pr *PkgDecoder) StringIdx(idx Index) string {
  128. return pr.DataIdx(RelocString, idx)
  129. }
  130. // NewDecoder returns a Decoder for the given (section, index) pair,
  131. // and decodes the given SyncMarker from the element bitstream.
  132. func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
  133. r := pr.NewDecoderRaw(k, idx)
  134. r.Sync(marker)
  135. return r
  136. }
  137. // NewDecoderRaw returns a Decoder for the given (section, index) pair.
  138. //
  139. // Most callers should use NewDecoder instead.
  140. func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder {
  141. r := Decoder{
  142. common: pr,
  143. k: k,
  144. Idx: idx,
  145. }
  146. // TODO(mdempsky) r.data.Reset(...) after #44505 is resolved.
  147. r.Data = *strings.NewReader(pr.DataIdx(k, idx))
  148. r.Sync(SyncRelocs)
  149. r.Relocs = make([]RelocEnt, r.Len())
  150. for i := range r.Relocs {
  151. r.Sync(SyncReloc)
  152. r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
  153. }
  154. return r
  155. }
  156. // A Decoder provides methods for decoding an individual element's
  157. // bitstream data.
  158. type Decoder struct {
  159. common *PkgDecoder
  160. Relocs []RelocEnt
  161. Data strings.Reader
  162. k RelocKind
  163. Idx Index
  164. }
  165. func (r *Decoder) checkErr(err error) {
  166. if err != nil {
  167. errorf("unexpected decoding error: %w", err)
  168. }
  169. }
  170. func (r *Decoder) rawUvarint() uint64 {
  171. x, err := binary.ReadUvarint(&r.Data)
  172. r.checkErr(err)
  173. return x
  174. }
  175. func (r *Decoder) rawVarint() int64 {
  176. ux := r.rawUvarint()
  177. // Zig-zag decode.
  178. x := int64(ux >> 1)
  179. if ux&1 != 0 {
  180. x = ^x
  181. }
  182. return x
  183. }
  184. func (r *Decoder) rawReloc(k RelocKind, idx int) Index {
  185. e := r.Relocs[idx]
  186. assert(e.Kind == k)
  187. return e.Idx
  188. }
  189. // Sync decodes a sync marker from the element bitstream and asserts
  190. // that it matches the expected marker.
  191. //
  192. // If r.common.sync is false, then Sync is a no-op.
  193. func (r *Decoder) Sync(mWant SyncMarker) {
  194. if !r.common.sync {
  195. return
  196. }
  197. pos, _ := r.Data.Seek(0, os.SEEK_CUR) // TODO(mdempsky): io.SeekCurrent after #44505 is resolved
  198. mHave := SyncMarker(r.rawUvarint())
  199. writerPCs := make([]int, r.rawUvarint())
  200. for i := range writerPCs {
  201. writerPCs[i] = int(r.rawUvarint())
  202. }
  203. if mHave == mWant {
  204. return
  205. }
  206. // There's some tension here between printing:
  207. //
  208. // (1) full file paths that tools can recognize (e.g., so emacs
  209. // hyperlinks the "file:line" text for easy navigation), or
  210. //
  211. // (2) short file paths that are easier for humans to read (e.g., by
  212. // omitting redundant or irrelevant details, so it's easier to
  213. // focus on the useful bits that remain).
  214. //
  215. // The current formatting favors the former, as it seems more
  216. // helpful in practice. But perhaps the formatting could be improved
  217. // to better address both concerns. For example, use relative file
  218. // paths if they would be shorter, or rewrite file paths to contain
  219. // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how
  220. // to reliably expand that again.
  221. fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos)
  222. fmt.Printf("\nfound %v, written at:\n", mHave)
  223. if len(writerPCs) == 0 {
  224. fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath)
  225. }
  226. for _, pc := range writerPCs {
  227. fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc)))
  228. }
  229. fmt.Printf("\nexpected %v, reading at:\n", mWant)
  230. var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size?
  231. n := runtime.Callers(2, readerPCs[:])
  232. for _, pc := range fmtFrames(readerPCs[:n]...) {
  233. fmt.Printf("\t%s\n", pc)
  234. }
  235. // We already printed a stack trace for the reader, so now we can
  236. // simply exit. Printing a second one with panic or base.Fatalf
  237. // would just be noise.
  238. os.Exit(1)
  239. }
  240. // Bool decodes and returns a bool value from the element bitstream.
  241. func (r *Decoder) Bool() bool {
  242. r.Sync(SyncBool)
  243. x, err := r.Data.ReadByte()
  244. r.checkErr(err)
  245. assert(x < 2)
  246. return x != 0
  247. }
  248. // Int64 decodes and returns an int64 value from the element bitstream.
  249. func (r *Decoder) Int64() int64 {
  250. r.Sync(SyncInt64)
  251. return r.rawVarint()
  252. }
  253. // Int64 decodes and returns a uint64 value from the element bitstream.
  254. func (r *Decoder) Uint64() uint64 {
  255. r.Sync(SyncUint64)
  256. return r.rawUvarint()
  257. }
  258. // Len decodes and returns a non-negative int value from the element bitstream.
  259. func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v }
  260. // Int decodes and returns an int value from the element bitstream.
  261. func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v }
  262. // Uint decodes and returns a uint value from the element bitstream.
  263. func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v }
  264. // Code decodes a Code value from the element bitstream and returns
  265. // its ordinal value. It's the caller's responsibility to convert the
  266. // result to an appropriate Code type.
  267. //
  268. // TODO(mdempsky): Ideally this method would have signature "Code[T
  269. // Code] T" instead, but we don't allow generic methods and the
  270. // compiler can't depend on generics yet anyway.
  271. func (r *Decoder) Code(mark SyncMarker) int {
  272. r.Sync(mark)
  273. return r.Len()
  274. }
  275. // Reloc decodes a relocation of expected section k from the element
  276. // bitstream and returns an index to the referenced element.
  277. func (r *Decoder) Reloc(k RelocKind) Index {
  278. r.Sync(SyncUseReloc)
  279. return r.rawReloc(k, r.Len())
  280. }
  281. // String decodes and returns a string value from the element
  282. // bitstream.
  283. func (r *Decoder) String() string {
  284. r.Sync(SyncString)
  285. return r.common.StringIdx(r.Reloc(RelocString))
  286. }
  287. // Strings decodes and returns a variable-length slice of strings from
  288. // the element bitstream.
  289. func (r *Decoder) Strings() []string {
  290. res := make([]string, r.Len())
  291. for i := range res {
  292. res[i] = r.String()
  293. }
  294. return res
  295. }
  296. // Value decodes and returns a constant.Value from the element
  297. // bitstream.
  298. func (r *Decoder) Value() constant.Value {
  299. r.Sync(SyncValue)
  300. isComplex := r.Bool()
  301. val := r.scalar()
  302. if isComplex {
  303. val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar()))
  304. }
  305. return val
  306. }
  307. func (r *Decoder) scalar() constant.Value {
  308. switch tag := CodeVal(r.Code(SyncVal)); tag {
  309. default:
  310. panic(fmt.Errorf("unexpected scalar tag: %v", tag))
  311. case ValBool:
  312. return constant.MakeBool(r.Bool())
  313. case ValString:
  314. return constant.MakeString(r.String())
  315. case ValInt64:
  316. return constant.MakeInt64(r.Int64())
  317. case ValBigInt:
  318. return constant.Make(r.bigInt())
  319. case ValBigRat:
  320. num := r.bigInt()
  321. denom := r.bigInt()
  322. return constant.Make(new(big.Rat).SetFrac(num, denom))
  323. case ValBigFloat:
  324. return constant.Make(r.bigFloat())
  325. }
  326. }
  327. func (r *Decoder) bigInt() *big.Int {
  328. v := new(big.Int).SetBytes([]byte(r.String()))
  329. if r.Bool() {
  330. v.Neg(v)
  331. }
  332. return v
  333. }
  334. func (r *Decoder) bigFloat() *big.Float {
  335. v := new(big.Float).SetPrec(512)
  336. assert(v.UnmarshalText([]byte(r.String())) == nil)
  337. return v
  338. }
  339. // @@@ Helpers
  340. // TODO(mdempsky): These should probably be removed. I think they're a
  341. // smell that the export data format is not yet quite right.
  342. // PeekPkgPath returns the package path for the specified package
  343. // index.
  344. func (pr *PkgDecoder) PeekPkgPath(idx Index) string {
  345. r := pr.NewDecoder(RelocPkg, idx, SyncPkgDef)
  346. path := r.String()
  347. if path == "" {
  348. path = pr.pkgPath
  349. }
  350. return path
  351. }
  352. // PeekObj returns the package path, object name, and CodeObj for the
  353. // specified object index.
  354. func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) {
  355. r := pr.NewDecoder(RelocName, idx, SyncObject1)
  356. r.Sync(SyncSym)
  357. r.Sync(SyncPkg)
  358. path := pr.PeekPkgPath(r.Reloc(RelocPkg))
  359. name := r.String()
  360. assert(name != "")
  361. tag := CodeObj(r.Code(SyncCodeObj))
  362. return path, name, tag
  363. }