enc_base.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. package zstd
  2. import (
  3. "fmt"
  4. "math/bits"
  5. "github.com/klauspost/compress/zstd/internal/xxhash"
  6. )
  7. const (
  8. dictShardBits = 6
  9. )
  10. type fastBase struct {
  11. // cur is the offset at the start of hist
  12. cur int32
  13. // maximum offset. Should be at least 2x block size.
  14. maxMatchOff int32
  15. hist []byte
  16. crc *xxhash.Digest
  17. tmp [8]byte
  18. blk *blockEnc
  19. lastDictID uint32
  20. lowMem bool
  21. }
  22. // CRC returns the underlying CRC writer.
  23. func (e *fastBase) CRC() *xxhash.Digest {
  24. return e.crc
  25. }
  26. // AppendCRC will append the CRC to the destination slice and return it.
  27. func (e *fastBase) AppendCRC(dst []byte) []byte {
  28. crc := e.crc.Sum(e.tmp[:0])
  29. dst = append(dst, crc[7], crc[6], crc[5], crc[4])
  30. return dst
  31. }
  32. // WindowSize returns the window size of the encoder,
  33. // or a window size small enough to contain the input size, if > 0.
  34. func (e *fastBase) WindowSize(size int64) int32 {
  35. if size > 0 && size < int64(e.maxMatchOff) {
  36. b := int32(1) << uint(bits.Len(uint(size)))
  37. // Keep minimum window.
  38. if b < 1024 {
  39. b = 1024
  40. }
  41. return b
  42. }
  43. return e.maxMatchOff
  44. }
  45. // Block returns the current block.
  46. func (e *fastBase) Block() *blockEnc {
  47. return e.blk
  48. }
  49. func (e *fastBase) addBlock(src []byte) int32 {
  50. if debugAsserts && e.cur > bufferReset {
  51. panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
  52. }
  53. // check if we have space already
  54. if len(e.hist)+len(src) > cap(e.hist) {
  55. if cap(e.hist) == 0 {
  56. e.ensureHist(len(src))
  57. } else {
  58. if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) {
  59. panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff))
  60. }
  61. // Move down
  62. offset := int32(len(e.hist)) - e.maxMatchOff
  63. copy(e.hist[0:e.maxMatchOff], e.hist[offset:])
  64. e.cur += offset
  65. e.hist = e.hist[:e.maxMatchOff]
  66. }
  67. }
  68. s := int32(len(e.hist))
  69. e.hist = append(e.hist, src...)
  70. return s
  71. }
  72. // ensureHist will ensure that history can keep at least this many bytes.
  73. func (e *fastBase) ensureHist(n int) {
  74. if cap(e.hist) >= n {
  75. return
  76. }
  77. l := e.maxMatchOff
  78. if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize {
  79. l += maxCompressedBlockSize
  80. } else {
  81. l += e.maxMatchOff
  82. }
  83. // Make it at least 1MB.
  84. if l < 1<<20 && !e.lowMem {
  85. l = 1 << 20
  86. }
  87. // Make it at least the requested size.
  88. if l < int32(n) {
  89. l = int32(n)
  90. }
  91. e.hist = make([]byte, 0, l)
  92. }
  93. // useBlock will replace the block with the provided one,
  94. // but transfer recent offsets from the previous.
  95. func (e *fastBase) UseBlock(enc *blockEnc) {
  96. enc.reset(e.blk)
  97. e.blk = enc
  98. }
  99. func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 {
  100. // Extend the match to be as long as possible.
  101. return int32(matchLen(src[s:], src[t:]))
  102. }
  103. func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
  104. if debugAsserts {
  105. if s < 0 {
  106. err := fmt.Sprintf("s (%d) < 0", s)
  107. panic(err)
  108. }
  109. if t < 0 {
  110. err := fmt.Sprintf("s (%d) < 0", s)
  111. panic(err)
  112. }
  113. if s-t > e.maxMatchOff {
  114. err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff)
  115. panic(err)
  116. }
  117. if len(src)-int(s) > maxCompressedBlockSize {
  118. panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
  119. }
  120. }
  121. // Extend the match to be as long as possible.
  122. return int32(matchLen(src[s:], src[t:]))
  123. }
  124. // Reset the encoding table.
  125. func (e *fastBase) resetBase(d *dict, singleBlock bool) {
  126. if e.blk == nil {
  127. e.blk = &blockEnc{lowMem: e.lowMem}
  128. e.blk.init()
  129. } else {
  130. e.blk.reset(nil)
  131. }
  132. e.blk.initNewEncode()
  133. if e.crc == nil {
  134. e.crc = xxhash.New()
  135. } else {
  136. e.crc.Reset()
  137. }
  138. if d != nil {
  139. low := e.lowMem
  140. if singleBlock {
  141. e.lowMem = true
  142. }
  143. e.ensureHist(d.DictContentSize() + maxCompressedBlockSize)
  144. e.lowMem = low
  145. }
  146. // We offset current position so everything will be out of reach.
  147. // If above reset line, history will be purged.
  148. if e.cur < bufferReset {
  149. e.cur += e.maxMatchOff + int32(len(e.hist))
  150. }
  151. e.hist = e.hist[:0]
  152. if d != nil {
  153. // Set offsets (currently not used)
  154. for i, off := range d.offsets {
  155. e.blk.recentOffsets[i] = uint32(off)
  156. e.blk.prevRecentOffsets[i] = e.blk.recentOffsets[i]
  157. }
  158. // Transfer litenc.
  159. e.blk.dictLitEnc = d.litEnc
  160. e.hist = append(e.hist, d.content...)
  161. }
  162. }