encoder.go 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. // Copyright 2021 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package pkgbits
  5. import (
  6. "bytes"
  7. "crypto/md5"
  8. "encoding/binary"
  9. "go/constant"
  10. "io"
  11. "math/big"
  12. "runtime"
  13. )
  14. // currentVersion is the current version number.
  15. //
  16. // - v0: initial prototype
  17. //
  18. // - v1: adds the flags uint32 word
  19. const currentVersion uint32 = 1
  20. // A PkgEncoder provides methods for encoding a package's Unified IR
  21. // export data.
  22. type PkgEncoder struct {
  23. // elems holds the bitstream for previously encoded elements.
  24. elems [numRelocs][]string
  25. // stringsIdx maps previously encoded strings to their index within
  26. // the RelocString section, to allow deduplication. That is,
  27. // elems[RelocString][stringsIdx[s]] == s (if present).
  28. stringsIdx map[string]Index
  29. // syncFrames is the number of frames to write at each sync
  30. // marker. A negative value means sync markers are omitted.
  31. syncFrames int
  32. }
  33. // SyncMarkers reports whether pw uses sync markers.
  34. func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
  35. // NewPkgEncoder returns an initialized PkgEncoder.
  36. //
  37. // syncFrames is the number of caller frames that should be serialized
  38. // at Sync points. Serializing additional frames results in larger
  39. // export data files, but can help diagnosing desync errors in
  40. // higher-level Unified IR reader/writer code. If syncFrames is
  41. // negative, then sync markers are omitted entirely.
  42. func NewPkgEncoder(syncFrames int) PkgEncoder {
  43. return PkgEncoder{
  44. stringsIdx: make(map[string]Index),
  45. syncFrames: syncFrames,
  46. }
  47. }
  48. // DumpTo writes the package's encoded data to out0 and returns the
  49. // package fingerprint.
  50. func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
  51. h := md5.New()
  52. out := io.MultiWriter(out0, h)
  53. writeUint32 := func(x uint32) {
  54. assert(binary.Write(out, binary.LittleEndian, x) == nil)
  55. }
  56. writeUint32(currentVersion)
  57. var flags uint32
  58. if pw.SyncMarkers() {
  59. flags |= flagSyncMarkers
  60. }
  61. writeUint32(flags)
  62. // Write elemEndsEnds.
  63. var sum uint32
  64. for _, elems := range &pw.elems {
  65. sum += uint32(len(elems))
  66. writeUint32(sum)
  67. }
  68. // Write elemEnds.
  69. sum = 0
  70. for _, elems := range &pw.elems {
  71. for _, elem := range elems {
  72. sum += uint32(len(elem))
  73. writeUint32(sum)
  74. }
  75. }
  76. // Write elemData.
  77. for _, elems := range &pw.elems {
  78. for _, elem := range elems {
  79. _, err := io.WriteString(out, elem)
  80. assert(err == nil)
  81. }
  82. }
  83. // Write fingerprint.
  84. copy(fingerprint[:], h.Sum(nil))
  85. _, err := out0.Write(fingerprint[:])
  86. assert(err == nil)
  87. return
  88. }
  89. // StringIdx adds a string value to the strings section, if not
  90. // already present, and returns its index.
  91. func (pw *PkgEncoder) StringIdx(s string) Index {
  92. if idx, ok := pw.stringsIdx[s]; ok {
  93. assert(pw.elems[RelocString][idx] == s)
  94. return idx
  95. }
  96. idx := Index(len(pw.elems[RelocString]))
  97. pw.elems[RelocString] = append(pw.elems[RelocString], s)
  98. pw.stringsIdx[s] = idx
  99. return idx
  100. }
  101. // NewEncoder returns an Encoder for a new element within the given
  102. // section, and encodes the given SyncMarker as the start of the
  103. // element bitstream.
  104. func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder {
  105. e := pw.NewEncoderRaw(k)
  106. e.Sync(marker)
  107. return e
  108. }
  109. // NewEncoderRaw returns an Encoder for a new element within the given
  110. // section.
  111. //
  112. // Most callers should use NewEncoder instead.
  113. func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder {
  114. idx := Index(len(pw.elems[k]))
  115. pw.elems[k] = append(pw.elems[k], "") // placeholder
  116. return Encoder{
  117. p: pw,
  118. k: k,
  119. Idx: idx,
  120. }
  121. }
  122. // An Encoder provides methods for encoding an individual element's
  123. // bitstream data.
  124. type Encoder struct {
  125. p *PkgEncoder
  126. Relocs []RelocEnt
  127. Data bytes.Buffer // accumulated element bitstream data
  128. encodingRelocHeader bool
  129. k RelocKind
  130. Idx Index // index within relocation section
  131. }
  132. // Flush finalizes the element's bitstream and returns its Index.
  133. func (w *Encoder) Flush() Index {
  134. var sb bytes.Buffer // TODO(mdempsky): strings.Builder after #44505 is resolved
  135. // Backup the data so we write the relocations at the front.
  136. var tmp bytes.Buffer
  137. io.Copy(&tmp, &w.Data)
  138. // TODO(mdempsky): Consider writing these out separately so they're
  139. // easier to strip, along with function bodies, so that we can prune
  140. // down to just the data that's relevant to go/types.
  141. if w.encodingRelocHeader {
  142. panic("encodingRelocHeader already true; recursive flush?")
  143. }
  144. w.encodingRelocHeader = true
  145. w.Sync(SyncRelocs)
  146. w.Len(len(w.Relocs))
  147. for _, rEnt := range w.Relocs {
  148. w.Sync(SyncReloc)
  149. w.Len(int(rEnt.Kind))
  150. w.Len(int(rEnt.Idx))
  151. }
  152. io.Copy(&sb, &w.Data)
  153. io.Copy(&sb, &tmp)
  154. w.p.elems[w.k][w.Idx] = sb.String()
  155. return w.Idx
  156. }
  157. func (w *Encoder) checkErr(err error) {
  158. if err != nil {
  159. errorf("unexpected encoding error: %v", err)
  160. }
  161. }
  162. func (w *Encoder) rawUvarint(x uint64) {
  163. var buf [binary.MaxVarintLen64]byte
  164. n := binary.PutUvarint(buf[:], x)
  165. _, err := w.Data.Write(buf[:n])
  166. w.checkErr(err)
  167. }
  168. func (w *Encoder) rawVarint(x int64) {
  169. // Zig-zag encode.
  170. ux := uint64(x) << 1
  171. if x < 0 {
  172. ux = ^ux
  173. }
  174. w.rawUvarint(ux)
  175. }
  176. func (w *Encoder) rawReloc(r RelocKind, idx Index) int {
  177. // TODO(mdempsky): Use map for lookup; this takes quadratic time.
  178. for i, rEnt := range w.Relocs {
  179. if rEnt.Kind == r && rEnt.Idx == idx {
  180. return i
  181. }
  182. }
  183. i := len(w.Relocs)
  184. w.Relocs = append(w.Relocs, RelocEnt{r, idx})
  185. return i
  186. }
  187. func (w *Encoder) Sync(m SyncMarker) {
  188. if !w.p.SyncMarkers() {
  189. return
  190. }
  191. // Writing out stack frame string references requires working
  192. // relocations, but writing out the relocations themselves involves
  193. // sync markers. To prevent infinite recursion, we simply trim the
  194. // stack frame for sync markers within the relocation header.
  195. var frames []string
  196. if !w.encodingRelocHeader && w.p.syncFrames > 0 {
  197. pcs := make([]uintptr, w.p.syncFrames)
  198. n := runtime.Callers(2, pcs)
  199. frames = fmtFrames(pcs[:n]...)
  200. }
  201. // TODO(mdempsky): Save space by writing out stack frames as a
  202. // linked list so we can share common stack frames.
  203. w.rawUvarint(uint64(m))
  204. w.rawUvarint(uint64(len(frames)))
  205. for _, frame := range frames {
  206. w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame))))
  207. }
  208. }
  209. // Bool encodes and writes a bool value into the element bitstream,
  210. // and then returns the bool value.
  211. //
  212. // For simple, 2-alternative encodings, the idiomatic way to call Bool
  213. // is something like:
  214. //
  215. // if w.Bool(x != 0) {
  216. // // alternative #1
  217. // } else {
  218. // // alternative #2
  219. // }
  220. //
  221. // For multi-alternative encodings, use Code instead.
  222. func (w *Encoder) Bool(b bool) bool {
  223. w.Sync(SyncBool)
  224. var x byte
  225. if b {
  226. x = 1
  227. }
  228. err := w.Data.WriteByte(x)
  229. w.checkErr(err)
  230. return b
  231. }
  232. // Int64 encodes and writes an int64 value into the element bitstream.
  233. func (w *Encoder) Int64(x int64) {
  234. w.Sync(SyncInt64)
  235. w.rawVarint(x)
  236. }
  237. // Uint64 encodes and writes a uint64 value into the element bitstream.
  238. func (w *Encoder) Uint64(x uint64) {
  239. w.Sync(SyncUint64)
  240. w.rawUvarint(x)
  241. }
  242. // Len encodes and writes a non-negative int value into the element bitstream.
  243. func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
  244. // Int encodes and writes an int value into the element bitstream.
  245. func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
  246. // Len encodes and writes a uint value into the element bitstream.
  247. func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
  248. // Reloc encodes and writes a relocation for the given (section,
  249. // index) pair into the element bitstream.
  250. //
  251. // Note: Only the index is formally written into the element
  252. // bitstream, so bitstream decoders must know from context which
  253. // section an encoded relocation refers to.
  254. func (w *Encoder) Reloc(r RelocKind, idx Index) {
  255. w.Sync(SyncUseReloc)
  256. w.Len(w.rawReloc(r, idx))
  257. }
  258. // Code encodes and writes a Code value into the element bitstream.
  259. func (w *Encoder) Code(c Code) {
  260. w.Sync(c.Marker())
  261. w.Len(c.Value())
  262. }
  263. // String encodes and writes a string value into the element
  264. // bitstream.
  265. //
  266. // Internally, strings are deduplicated by adding them to the strings
  267. // section (if not already present), and then writing a relocation
  268. // into the element bitstream.
  269. func (w *Encoder) String(s string) {
  270. w.Sync(SyncString)
  271. w.Reloc(RelocString, w.p.StringIdx(s))
  272. }
  273. // Strings encodes and writes a variable-length slice of strings into
  274. // the element bitstream.
  275. func (w *Encoder) Strings(ss []string) {
  276. w.Len(len(ss))
  277. for _, s := range ss {
  278. w.String(s)
  279. }
  280. }
  281. // Value encodes and writes a constant.Value into the element
  282. // bitstream.
  283. func (w *Encoder) Value(val constant.Value) {
  284. w.Sync(SyncValue)
  285. if w.Bool(val.Kind() == constant.Complex) {
  286. w.scalar(constant.Real(val))
  287. w.scalar(constant.Imag(val))
  288. } else {
  289. w.scalar(val)
  290. }
  291. }
  292. func (w *Encoder) scalar(val constant.Value) {
  293. switch v := constant.Val(val).(type) {
  294. default:
  295. errorf("unhandled %v (%v)", val, val.Kind())
  296. case bool:
  297. w.Code(ValBool)
  298. w.Bool(v)
  299. case string:
  300. w.Code(ValString)
  301. w.String(v)
  302. case int64:
  303. w.Code(ValInt64)
  304. w.Int64(v)
  305. case *big.Int:
  306. w.Code(ValBigInt)
  307. w.bigInt(v)
  308. case *big.Rat:
  309. w.Code(ValBigRat)
  310. w.bigInt(v.Num())
  311. w.bigInt(v.Denom())
  312. case *big.Float:
  313. w.Code(ValBigFloat)
  314. w.bigFloat(v)
  315. }
  316. }
  317. func (w *Encoder) bigInt(v *big.Int) {
  318. b := v.Bytes()
  319. w.String(string(b)) // TODO: More efficient encoding.
  320. w.Bool(v.Sign() < 0)
  321. }
  322. func (w *Encoder) bigFloat(v *big.Float) {
  323. b := v.Append(nil, 'p', -1)
  324. w.String(string(b)) // TODO: More efficient encoding.
  325. }