2
0

streamreader.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. package azure
  2. import (
  3. "bytes"
  4. "context"
  5. "io"
  6. "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
  7. )
  8. const defaultBlockSize = int(8 * 1024 * 1024) // 8MB
  9. // StreamReader is a double buffered streaming reader for Azure Blob Storage.
  10. type StreamReader struct {
  11. client *azblob.Client
  12. container string
  13. blobName string
  14. block *bytes.Buffer
  15. next *streamingBlock
  16. position int64
  17. size int64
  18. }
  19. // NewStreamReader creates a new streaming reader for the specified blob.
  20. func NewStreamReader(client *azblob.Client, container string, blobName string) (*StreamReader, error) {
  21. sar := &StreamReader{
  22. client: client,
  23. container: container,
  24. blobName: blobName,
  25. block: nil,
  26. next: nil,
  27. position: 0,
  28. size: 0,
  29. }
  30. // get the size of the blob
  31. blobClient := client.ServiceClient().NewContainerClient(container).NewBlobClient(blobName)
  32. gr, err := blobClient.GetProperties(context.Background(), nil)
  33. if err != nil {
  34. return nil, err
  35. }
  36. sar.size = *gr.ContentLength
  37. return sar, nil
  38. }
  39. // See io.Reader.Read
  40. func (r *StreamReader) Read(p []byte) (n int, err error) {
  41. if r.position >= r.size {
  42. return 0, io.EOF
  43. }
  44. // fetch the blocks on demand
  45. if r.block == nil || r.block.Len() == 0 {
  46. err := r.nextBlock()
  47. if err != nil {
  48. return 0, err
  49. }
  50. }
  51. // block.Next() constrains the bytes read even if len(p) is larger
  52. // than the rest of the block
  53. copied := copy(p, r.block.Next(len(p)))
  54. r.position += int64(copied)
  55. return copied, nil
  56. }
  57. // nextBlock fetches the next block of data from the blob and starts the download of
  58. // the next block in the background.
  59. func (r *StreamReader) nextBlock() error {
  60. // if we don't have a block, we need to fetch the first block, and start fetching
  61. // the next block in the background
  62. if r.block == nil {
  63. current := newStreamBlock(
  64. r.client,
  65. r.container,
  66. r.blobName,
  67. nil,
  68. r.position,
  69. int64(defaultBlockSize),
  70. r.size,
  71. )
  72. // explicitly wait here for the first block
  73. err := current.Wait()
  74. if err != nil {
  75. return err
  76. }
  77. // set the current block and start the next block download
  78. r.block = current.buffer
  79. // if the block size capacity was reduced to a value different than the default block size,
  80. // we can assume there is no more data beyond this block, so we don't need to start the next block
  81. if current.capacity != int64(defaultBlockSize) {
  82. return nil
  83. }
  84. // start next block stream
  85. r.next = newStreamBlock(
  86. r.client,
  87. r.container,
  88. r.blobName,
  89. nil,
  90. r.position+current.capacity,
  91. int64(defaultBlockSize),
  92. r.size,
  93. )
  94. return nil
  95. }
  96. // we have a block and a next block, so we need to wait for the next block to finish
  97. // buffering, then we can swap current and next buffers
  98. err := r.next.Wait()
  99. if err != nil {
  100. return err
  101. }
  102. // save the current buffer to re-use in the next block and set the current to the next block
  103. currentBuffer := r.block
  104. r.block = r.next.buffer
  105. if r.next.capacity != int64(defaultBlockSize) {
  106. return nil
  107. }
  108. // start next block stream
  109. r.next = newStreamBlock(
  110. r.client,
  111. r.container,
  112. r.blobName,
  113. currentBuffer, // recycle the old current buffer as the next buffer
  114. r.position+int64(defaultBlockSize),
  115. int64(defaultBlockSize),
  116. r.size,
  117. )
  118. return nil
  119. }
  120. // streamingBlock is a buffered block of data that runs in a separate goroutine
  121. // to allow the next block to download while the current block is being read.
  122. type streamingBlock struct {
  123. client *azblob.Client
  124. container string
  125. blob string
  126. done chan struct{}
  127. buffer *bytes.Buffer
  128. err error
  129. start int64
  130. capacity int64
  131. }
  132. // newStreamBlock creates a new buffered block of data the down the specific
  133. // range of the blob. While the block download runs in a separate goroutine,
  134. // we will never attempt to access the passed buffer until after the Wait()
  135. // returns. This just ensures that we will never attempt to swap buffers
  136. // mid-download.
  137. func newStreamBlock(
  138. client *azblob.Client,
  139. container string,
  140. blob string,
  141. buffer *bytes.Buffer,
  142. start int64,
  143. capacity int64,
  144. max int64,
  145. ) *streamingBlock {
  146. sb := &streamingBlock{
  147. client: client,
  148. container: container,
  149. blob: blob,
  150. done: make(chan struct{}),
  151. buffer: buffer,
  152. start: start,
  153. capacity: capacity,
  154. }
  155. // determine if we need to reallocate a new block buffer or if we can re-use the existing storage
  156. blockSize := capacity
  157. if start+blockSize > max {
  158. blockSize = max - start
  159. }
  160. // if the provided buffer is nil or the blockSize is different than the provided capacity, we need to reallocate
  161. // reallocation will likely happen once at the end of the stream
  162. if sb.buffer == nil || blockSize != capacity {
  163. sb.buffer = bytes.NewBuffer(make([]byte, 0, blockSize))
  164. sb.capacity = blockSize
  165. } else {
  166. sb.buffer.Reset()
  167. }
  168. // start a goroutine to fetch the block of data, close the done channel when the block
  169. // is fetched or an error occurs
  170. go func(block *streamingBlock) {
  171. ctx := context.Background()
  172. opts := azblob.DownloadStreamOptions{
  173. Range: azblob.HTTPRange{
  174. Offset: block.start,
  175. Count: block.capacity,
  176. },
  177. }
  178. resp, err := block.client.DownloadStream(ctx, block.container, block.blob, &opts)
  179. if err != nil {
  180. block.err = err
  181. close(block.done)
  182. return
  183. }
  184. retryOpts := &azblob.RetryReaderOptions{
  185. MaxRetries: 3,
  186. }
  187. var body io.ReadCloser = resp.NewRetryReader(ctx, retryOpts)
  188. _, err = io.Copy(block.buffer, body)
  189. if err != nil {
  190. block.err = err
  191. close(block.done)
  192. return
  193. }
  194. err = body.Close()
  195. if err != nil {
  196. block.err = err
  197. close(block.done)
  198. return
  199. }
  200. close(block.done)
  201. }(sb)
  202. return sb
  203. }
  204. // Wait blocks until the block is downloaded and returns any error that occurred.
  205. func (sb *streamingBlock) Wait() error {
  206. <-sb.done
  207. return sb.err
  208. }