writer.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. package lz4
  2. import (
  3. "encoding/binary"
  4. "fmt"
  5. "io"
  6. "runtime"
  7. "github.com/pierrec/lz4/internal/xxh32"
  8. )
  9. // zResult contains the results of compressing a block.
  10. type zResult struct {
  11. size uint32 // Block header
  12. data []byte // Compressed data
  13. checksum uint32 // Data checksum
  14. }
  15. // Writer implements the LZ4 frame encoder.
  16. type Writer struct {
  17. Header
  18. // Handler called when a block has been successfully written out.
  19. // It provides the number of bytes written.
  20. OnBlockDone func(size int)
  21. buf [19]byte // magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes
  22. dst io.Writer // Destination.
  23. checksum xxh32.XXHZero // Frame checksum.
  24. data []byte // Data to be compressed + buffer for compressed data.
  25. idx int // Index into data.
  26. hashtable [winSize]int // Hash table used in CompressBlock().
  27. // For concurrency.
  28. c chan chan zResult // Channel for block compression goroutines and writer goroutine.
  29. err error // Any error encountered while writing to the underlying destination.
  30. }
  31. // NewWriter returns a new LZ4 frame encoder.
  32. // No access to the underlying io.Writer is performed.
  33. // The supplied Header is checked at the first Write.
  34. // It is ok to change it before the first Write but then not until a Reset() is performed.
  35. func NewWriter(dst io.Writer) *Writer {
  36. z := new(Writer)
  37. z.Reset(dst)
  38. return z
  39. }
  40. // WithConcurrency sets the number of concurrent go routines used for compression.
  41. // A negative value sets the concurrency to GOMAXPROCS.
  42. func (z *Writer) WithConcurrency(n int) *Writer {
  43. switch {
  44. case n == 0 || n == 1:
  45. z.c = nil
  46. return z
  47. case n < 0:
  48. n = runtime.GOMAXPROCS(0)
  49. }
  50. z.c = make(chan chan zResult, n)
  51. // Writer goroutine managing concurrent block compression goroutines.
  52. go func() {
  53. // Process next block compression item.
  54. for c := range z.c {
  55. // Read the next compressed block result.
  56. // Waiting here ensures that the blocks are output in the order they were sent.
  57. // The incoming channel is always closed as it indicates to the caller that
  58. // the block has been processed.
  59. res := <-c
  60. n := len(res.data)
  61. if n == 0 {
  62. // Notify the block compression routine that we are done with its result.
  63. // This is used when a sentinel block is sent to terminate the compression.
  64. close(c)
  65. return
  66. }
  67. // Write the block.
  68. if err := z.writeUint32(res.size); err != nil && z.err == nil {
  69. z.err = err
  70. }
  71. if _, err := z.dst.Write(res.data); err != nil && z.err == nil {
  72. z.err = err
  73. }
  74. if z.BlockChecksum {
  75. if err := z.writeUint32(res.checksum); err != nil && z.err == nil {
  76. z.err = err
  77. }
  78. }
  79. // It is now safe to release the buffer as no longer in use by any goroutine.
  80. putBuffer(cap(res.data), res.data)
  81. if h := z.OnBlockDone; h != nil {
  82. h(n)
  83. }
  84. close(c)
  85. }
  86. }()
  87. return z
  88. }
  89. // newBuffers instantiates new buffers which size matches the one in Header.
  90. // The returned buffers are for decompression and compression respectively.
  91. func (z *Writer) newBuffers() {
  92. bSize := z.Header.BlockMaxSize
  93. buf := getBuffer(bSize)
  94. z.data = buf[:bSize] // Uncompressed buffer is the first half.
  95. }
  96. // freeBuffers puts the writer's buffers back to the pool.
  97. func (z *Writer) freeBuffers() {
  98. // Put the buffer back into the pool, if any.
  99. putBuffer(z.Header.BlockMaxSize, z.data)
  100. z.data = nil
  101. }
  102. // writeHeader builds and writes the header (magic+header) to the underlying io.Writer.
  103. func (z *Writer) writeHeader() error {
  104. // Default to 4Mb if BlockMaxSize is not set.
  105. if z.Header.BlockMaxSize == 0 {
  106. z.Header.BlockMaxSize = blockSize4M
  107. }
  108. // The only option that needs to be validated.
  109. bSize := z.Header.BlockMaxSize
  110. if !isValidBlockSize(z.Header.BlockMaxSize) {
  111. return fmt.Errorf("lz4: invalid block max size: %d", bSize)
  112. }
  113. // Allocate the compressed/uncompressed buffers.
  114. // The compressed buffer cannot exceed the uncompressed one.
  115. z.newBuffers()
  116. z.idx = 0
  117. // Size is optional.
  118. buf := z.buf[:]
  119. // Set the fixed size data: magic number, block max size and flags.
  120. binary.LittleEndian.PutUint32(buf[0:], frameMagic)
  121. flg := byte(Version << 6)
  122. flg |= 1 << 5 // No block dependency.
  123. if z.Header.BlockChecksum {
  124. flg |= 1 << 4
  125. }
  126. if z.Header.Size > 0 {
  127. flg |= 1 << 3
  128. }
  129. if !z.Header.NoChecksum {
  130. flg |= 1 << 2
  131. }
  132. buf[4] = flg
  133. buf[5] = blockSizeValueToIndex(z.Header.BlockMaxSize) << 4
  134. // Current buffer size: magic(4) + flags(1) + block max size (1).
  135. n := 6
  136. // Optional items.
  137. if z.Header.Size > 0 {
  138. binary.LittleEndian.PutUint64(buf[n:], z.Header.Size)
  139. n += 8
  140. }
  141. // The header checksum includes the flags, block max size and optional Size.
  142. buf[n] = byte(xxh32.ChecksumZero(buf[4:n]) >> 8 & 0xFF)
  143. z.checksum.Reset()
  144. // Header ready, write it out.
  145. if _, err := z.dst.Write(buf[0 : n+1]); err != nil {
  146. return err
  147. }
  148. z.Header.done = true
  149. if debugFlag {
  150. debug("wrote header %v", z.Header)
  151. }
  152. return nil
  153. }
  154. // Write compresses data from the supplied buffer into the underlying io.Writer.
  155. // Write does not return until the data has been written.
  156. func (z *Writer) Write(buf []byte) (int, error) {
  157. if !z.Header.done {
  158. if err := z.writeHeader(); err != nil {
  159. return 0, err
  160. }
  161. }
  162. if debugFlag {
  163. debug("input buffer len=%d index=%d", len(buf), z.idx)
  164. }
  165. zn := len(z.data)
  166. var n int
  167. for len(buf) > 0 {
  168. if z.idx == 0 && len(buf) >= zn {
  169. // Avoid a copy as there is enough data for a block.
  170. if err := z.compressBlock(buf[:zn]); err != nil {
  171. return n, err
  172. }
  173. n += zn
  174. buf = buf[zn:]
  175. continue
  176. }
  177. // Accumulate the data to be compressed.
  178. m := copy(z.data[z.idx:], buf)
  179. n += m
  180. z.idx += m
  181. buf = buf[m:]
  182. if debugFlag {
  183. debug("%d bytes copied to buf, current index %d", n, z.idx)
  184. }
  185. if z.idx < len(z.data) {
  186. // Buffer not filled.
  187. if debugFlag {
  188. debug("need more data for compression")
  189. }
  190. return n, nil
  191. }
  192. // Buffer full.
  193. if err := z.compressBlock(z.data); err != nil {
  194. return n, err
  195. }
  196. z.idx = 0
  197. }
  198. return n, nil
  199. }
  200. // compressBlock compresses a block.
  201. func (z *Writer) compressBlock(data []byte) error {
  202. if !z.NoChecksum {
  203. _, _ = z.checksum.Write(data)
  204. }
  205. if z.c != nil {
  206. c := make(chan zResult)
  207. z.c <- c // Send now to guarantee order
  208. // get a buffer from the pool and copy the data over
  209. block := getBuffer(z.Header.BlockMaxSize)[:len(data)]
  210. copy(block, data)
  211. go writerCompressBlock(c, z.Header, block)
  212. return nil
  213. }
  214. zdata := z.data[z.Header.BlockMaxSize:cap(z.data)]
  215. // The compressed block size cannot exceed the input's.
  216. var zn int
  217. if level := z.Header.CompressionLevel; level != 0 {
  218. zn, _ = CompressBlockHC(data, zdata, level)
  219. } else {
  220. zn, _ = CompressBlock(data, zdata, z.hashtable[:])
  221. }
  222. var bLen uint32
  223. if debugFlag {
  224. debug("block compression %d => %d", len(data), zn)
  225. }
  226. if zn > 0 && zn < len(data) {
  227. // Compressible and compressed size smaller than uncompressed: ok!
  228. bLen = uint32(zn)
  229. zdata = zdata[:zn]
  230. } else {
  231. // Uncompressed block.
  232. bLen = uint32(len(data)) | compressedBlockFlag
  233. zdata = data
  234. }
  235. if debugFlag {
  236. debug("block compression to be written len=%d data len=%d", bLen, len(zdata))
  237. }
  238. // Write the block.
  239. if err := z.writeUint32(bLen); err != nil {
  240. return err
  241. }
  242. written, err := z.dst.Write(zdata)
  243. if err != nil {
  244. return err
  245. }
  246. if h := z.OnBlockDone; h != nil {
  247. h(written)
  248. }
  249. if !z.BlockChecksum {
  250. if debugFlag {
  251. debug("current frame checksum %x", z.checksum.Sum32())
  252. }
  253. return nil
  254. }
  255. checksum := xxh32.ChecksumZero(zdata)
  256. if debugFlag {
  257. debug("block checksum %x", checksum)
  258. defer func() { debug("current frame checksum %x", z.checksum.Sum32()) }()
  259. }
  260. return z.writeUint32(checksum)
  261. }
  262. // Flush flushes any pending compressed data to the underlying writer.
  263. // Flush does not return until the data has been written.
  264. // If the underlying writer returns an error, Flush returns that error.
  265. func (z *Writer) Flush() error {
  266. if debugFlag {
  267. debug("flush with index %d", z.idx)
  268. }
  269. if z.idx == 0 {
  270. return nil
  271. }
  272. data := getBuffer(z.Header.BlockMaxSize)[:len(z.data[:z.idx])]
  273. copy(data, z.data[:z.idx])
  274. z.idx = 0
  275. if z.c == nil {
  276. return z.compressBlock(data)
  277. }
  278. if !z.NoChecksum {
  279. _, _ = z.checksum.Write(data)
  280. }
  281. c := make(chan zResult)
  282. z.c <- c
  283. writerCompressBlock(c, z.Header, data)
  284. return nil
  285. }
  286. func (z *Writer) close() error {
  287. if z.c == nil {
  288. return nil
  289. }
  290. // Send a sentinel block (no data to compress) to terminate the writer main goroutine.
  291. c := make(chan zResult)
  292. z.c <- c
  293. c <- zResult{}
  294. // Wait for the main goroutine to complete.
  295. <-c
  296. // At this point the main goroutine has shut down or is about to return.
  297. z.c = nil
  298. return z.err
  299. }
  300. // Close closes the Writer, flushing any unwritten data to the underlying io.Writer, but does not close the underlying io.Writer.
  301. func (z *Writer) Close() error {
  302. if !z.Header.done {
  303. if err := z.writeHeader(); err != nil {
  304. return err
  305. }
  306. }
  307. if err := z.Flush(); err != nil {
  308. return err
  309. }
  310. if err := z.close(); err != nil {
  311. return err
  312. }
  313. z.freeBuffers()
  314. if debugFlag {
  315. debug("writing last empty block")
  316. }
  317. if err := z.writeUint32(0); err != nil {
  318. return err
  319. }
  320. if z.NoChecksum {
  321. return nil
  322. }
  323. checksum := z.checksum.Sum32()
  324. if debugFlag {
  325. debug("stream checksum %x", checksum)
  326. }
  327. return z.writeUint32(checksum)
  328. }
  329. // Reset clears the state of the Writer z such that it is equivalent to its
  330. // initial state from NewWriter, but instead writing to w.
  331. // No access to the underlying io.Writer is performed.
  332. func (z *Writer) Reset(w io.Writer) {
  333. n := cap(z.c)
  334. _ = z.close()
  335. z.freeBuffers()
  336. z.Header.Reset()
  337. z.dst = w
  338. z.checksum.Reset()
  339. z.idx = 0
  340. z.err = nil
  341. // reset hashtable to ensure deterministic output.
  342. for i := range z.hashtable {
  343. z.hashtable[i] = 0
  344. }
  345. z.WithConcurrency(n)
  346. }
  347. // writeUint32 writes a uint32 to the underlying writer.
  348. func (z *Writer) writeUint32(x uint32) error {
  349. buf := z.buf[:4]
  350. binary.LittleEndian.PutUint32(buf, x)
  351. _, err := z.dst.Write(buf)
  352. return err
  353. }
  354. // writerCompressBlock compresses data into a pooled buffer and writes its result
  355. // out to the input channel.
  356. func writerCompressBlock(c chan zResult, header Header, data []byte) {
  357. zdata := getBuffer(header.BlockMaxSize)
  358. // The compressed block size cannot exceed the input's.
  359. var zn int
  360. if level := header.CompressionLevel; level != 0 {
  361. zn, _ = CompressBlockHC(data, zdata, level)
  362. } else {
  363. var hashTable [winSize]int
  364. zn, _ = CompressBlock(data, zdata, hashTable[:])
  365. }
  366. var res zResult
  367. if zn > 0 && zn < len(data) {
  368. res.size = uint32(zn)
  369. res.data = zdata[:zn]
  370. // release the uncompressed block since it is not used anymore
  371. putBuffer(header.BlockMaxSize, data)
  372. } else {
  373. res.size = uint32(len(data)) | compressedBlockFlag
  374. res.data = data
  375. // release the compressed block since it was not used
  376. putBuffer(header.BlockMaxSize, zdata)
  377. }
  378. if header.BlockChecksum {
  379. res.checksum = xxh32.ChecksumZero(res.data)
  380. }
  381. c <- res
  382. }