seqdec.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import (
  6. "errors"
  7. "fmt"
  8. "io"
  9. )
  10. type seq struct {
  11. litLen uint32
  12. matchLen uint32
  13. offset uint32
  14. // Codes are stored here for the encoder
  15. // so they only have to be looked up once.
  16. llCode, mlCode, ofCode uint8
  17. }
  18. func (s seq) String() string {
  19. if s.offset <= 3 {
  20. if s.offset == 0 {
  21. return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset: INVALID (0)")
  22. }
  23. return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset, " (repeat)")
  24. }
  25. return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset-3, " (new)")
  26. }
  27. type seqCompMode uint8
  28. const (
  29. compModePredefined seqCompMode = iota
  30. compModeRLE
  31. compModeFSE
  32. compModeRepeat
  33. )
  34. type sequenceDec struct {
  35. // decoder keeps track of the current state and updates it from the bitstream.
  36. fse *fseDecoder
  37. state fseState
  38. repeat bool
  39. }
  40. // init the state of the decoder with input from stream.
  41. func (s *sequenceDec) init(br *bitReader) error {
  42. if s.fse == nil {
  43. return errors.New("sequence decoder not defined")
  44. }
  45. s.state.init(br, s.fse.actualTableLog, s.fse.dt[:1<<s.fse.actualTableLog])
  46. return nil
  47. }
  48. // sequenceDecs contains all 3 sequence decoders and their state.
  49. type sequenceDecs struct {
  50. litLengths sequenceDec
  51. offsets sequenceDec
  52. matchLengths sequenceDec
  53. prevOffset [3]int
  54. hist []byte
  55. dict []byte
  56. literals []byte
  57. out []byte
  58. windowSize int
  59. maxBits uint8
  60. }
  61. // initialize all 3 decoders from the stream input.
  62. func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out []byte) error {
  63. if err := s.litLengths.init(br); err != nil {
  64. return errors.New("litLengths:" + err.Error())
  65. }
  66. if err := s.offsets.init(br); err != nil {
  67. return errors.New("offsets:" + err.Error())
  68. }
  69. if err := s.matchLengths.init(br); err != nil {
  70. return errors.New("matchLengths:" + err.Error())
  71. }
  72. s.literals = literals
  73. s.hist = hist.b
  74. s.prevOffset = hist.recentOffsets
  75. s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits
  76. s.windowSize = hist.windowSize
  77. s.out = out
  78. s.dict = nil
  79. if hist.dict != nil {
  80. s.dict = hist.dict.content
  81. }
  82. return nil
  83. }
  84. // decode sequences from the stream with the provided history.
  85. func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
  86. startSize := len(s.out)
  87. // Grab full sizes tables, to avoid bounds checks.
  88. llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
  89. llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
  90. for i := seqs - 1; i >= 0; i-- {
  91. if br.overread() {
  92. printf("reading sequence %d, exceeded available data\n", seqs-i)
  93. return io.ErrUnexpectedEOF
  94. }
  95. var ll, mo, ml int
  96. if br.off > 4+((maxOffsetBits+16+16)>>3) {
  97. // inlined function:
  98. // ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
  99. // Final will not read from stream.
  100. var llB, mlB, moB uint8
  101. ll, llB = llState.final()
  102. ml, mlB = mlState.final()
  103. mo, moB = ofState.final()
  104. // extra bits are stored in reverse order.
  105. br.fillFast()
  106. mo += br.getBits(moB)
  107. if s.maxBits > 32 {
  108. br.fillFast()
  109. }
  110. ml += br.getBits(mlB)
  111. ll += br.getBits(llB)
  112. if moB > 1 {
  113. s.prevOffset[2] = s.prevOffset[1]
  114. s.prevOffset[1] = s.prevOffset[0]
  115. s.prevOffset[0] = mo
  116. } else {
  117. // mo = s.adjustOffset(mo, ll, moB)
  118. // Inlined for rather big speedup
  119. if ll == 0 {
  120. // There is an exception though, when current sequence's literals_length = 0.
  121. // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
  122. // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
  123. mo++
  124. }
  125. if mo == 0 {
  126. mo = s.prevOffset[0]
  127. } else {
  128. var temp int
  129. if mo == 3 {
  130. temp = s.prevOffset[0] - 1
  131. } else {
  132. temp = s.prevOffset[mo]
  133. }
  134. if temp == 0 {
  135. // 0 is not valid; input is corrupted; force offset to 1
  136. println("temp was 0")
  137. temp = 1
  138. }
  139. if mo != 1 {
  140. s.prevOffset[2] = s.prevOffset[1]
  141. }
  142. s.prevOffset[1] = s.prevOffset[0]
  143. s.prevOffset[0] = temp
  144. mo = temp
  145. }
  146. }
  147. br.fillFast()
  148. } else {
  149. ll, mo, ml = s.next(br, llState, mlState, ofState)
  150. br.fill()
  151. }
  152. if debugSequences {
  153. println("Seq", seqs-i-1, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
  154. }
  155. if ll > len(s.literals) {
  156. return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals))
  157. }
  158. size := ll + ml + len(s.out)
  159. if size-startSize > maxBlockSize {
  160. return fmt.Errorf("output (%d) bigger than max block size", size)
  161. }
  162. if size > cap(s.out) {
  163. // Not enough size, which can happen under high volume block streaming conditions
  164. // but could be if destination slice is too small for sync operations.
  165. // over-allocating here can create a large amount of GC pressure so we try to keep
  166. // it as contained as possible
  167. used := len(s.out) - startSize
  168. addBytes := 256 + ll + ml + used>>2
  169. // Clamp to max block size.
  170. if used+addBytes > maxBlockSize {
  171. addBytes = maxBlockSize - used
  172. }
  173. s.out = append(s.out, make([]byte, addBytes)...)
  174. s.out = s.out[:len(s.out)-addBytes]
  175. }
  176. if ml > maxMatchLen {
  177. return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
  178. }
  179. // Add literals
  180. s.out = append(s.out, s.literals[:ll]...)
  181. s.literals = s.literals[ll:]
  182. out := s.out
  183. if mo == 0 && ml > 0 {
  184. return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
  185. }
  186. if mo > len(s.out)+len(hist) || mo > s.windowSize {
  187. if len(s.dict) == 0 {
  188. return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
  189. }
  190. // we may be in dictionary.
  191. dictO := len(s.dict) - (mo - (len(s.out) + len(hist)))
  192. if dictO < 0 || dictO >= len(s.dict) {
  193. return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
  194. }
  195. end := dictO + ml
  196. if end > len(s.dict) {
  197. out = append(out, s.dict[dictO:]...)
  198. mo -= len(s.dict) - dictO
  199. ml -= len(s.dict) - dictO
  200. } else {
  201. out = append(out, s.dict[dictO:end]...)
  202. mo = 0
  203. ml = 0
  204. }
  205. }
  206. // Copy from history.
  207. // TODO: Blocks without history could be made to ignore this completely.
  208. if v := mo - len(s.out); v > 0 {
  209. // v is the start position in history from end.
  210. start := len(s.hist) - v
  211. if ml > v {
  212. // Some goes into current block.
  213. // Copy remainder of history
  214. out = append(out, s.hist[start:]...)
  215. mo -= v
  216. ml -= v
  217. } else {
  218. out = append(out, s.hist[start:start+ml]...)
  219. ml = 0
  220. }
  221. }
  222. // We must be in current buffer now
  223. if ml > 0 {
  224. start := len(s.out) - mo
  225. if ml <= len(s.out)-start {
  226. // No overlap
  227. out = append(out, s.out[start:start+ml]...)
  228. } else {
  229. // Overlapping copy
  230. // Extend destination slice and copy one byte at the time.
  231. out = out[:len(out)+ml]
  232. src := out[start : start+ml]
  233. // Destination is the space we just added.
  234. dst := out[len(out)-ml:]
  235. dst = dst[:len(src)]
  236. for i := range src {
  237. dst[i] = src[i]
  238. }
  239. }
  240. }
  241. s.out = out
  242. if i == 0 {
  243. // This is the last sequence, so we shouldn't update state.
  244. break
  245. }
  246. // Manually inlined, ~ 5-20% faster
  247. // Update all 3 states at once. Approx 20% faster.
  248. nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
  249. if nBits == 0 {
  250. llState = llTable[llState.newState()&maxTableMask]
  251. mlState = mlTable[mlState.newState()&maxTableMask]
  252. ofState = ofTable[ofState.newState()&maxTableMask]
  253. } else {
  254. bits := br.getBitsFast(nBits)
  255. lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
  256. llState = llTable[(llState.newState()+lowBits)&maxTableMask]
  257. lowBits = uint16(bits >> (ofState.nbBits() & 31))
  258. lowBits &= bitMask[mlState.nbBits()&15]
  259. mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
  260. lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
  261. ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
  262. }
  263. }
  264. // Add final literals
  265. s.out = append(s.out, s.literals...)
  266. return nil
  267. }
  268. // update states, at least 27 bits must be available.
  269. func (s *sequenceDecs) update(br *bitReader) {
  270. // Max 8 bits
  271. s.litLengths.state.next(br)
  272. // Max 9 bits
  273. s.matchLengths.state.next(br)
  274. // Max 8 bits
  275. s.offsets.state.next(br)
  276. }
  277. var bitMask [16]uint16
  278. func init() {
  279. for i := range bitMask[:] {
  280. bitMask[i] = uint16((1 << uint(i)) - 1)
  281. }
  282. }
  283. // update states, at least 27 bits must be available.
  284. func (s *sequenceDecs) updateAlt(br *bitReader) {
  285. // Update all 3 states at once. Approx 20% faster.
  286. a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
  287. nBits := a.nbBits() + b.nbBits() + c.nbBits()
  288. if nBits == 0 {
  289. s.litLengths.state.state = s.litLengths.state.dt[a.newState()]
  290. s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()]
  291. s.offsets.state.state = s.offsets.state.dt[c.newState()]
  292. return
  293. }
  294. bits := br.getBitsFast(nBits)
  295. lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
  296. s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]
  297. lowBits = uint16(bits >> (c.nbBits() & 31))
  298. lowBits &= bitMask[b.nbBits()&15]
  299. s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits]
  300. lowBits = uint16(bits) & bitMask[c.nbBits()&15]
  301. s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits]
  302. }
  303. // nextFast will return new states when there are at least 4 unused bytes left on the stream when done.
  304. func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
  305. // Final will not read from stream.
  306. ll, llB := llState.final()
  307. ml, mlB := mlState.final()
  308. mo, moB := ofState.final()
  309. // extra bits are stored in reverse order.
  310. br.fillFast()
  311. mo += br.getBits(moB)
  312. if s.maxBits > 32 {
  313. br.fillFast()
  314. }
  315. ml += br.getBits(mlB)
  316. ll += br.getBits(llB)
  317. if moB > 1 {
  318. s.prevOffset[2] = s.prevOffset[1]
  319. s.prevOffset[1] = s.prevOffset[0]
  320. s.prevOffset[0] = mo
  321. return
  322. }
  323. // mo = s.adjustOffset(mo, ll, moB)
  324. // Inlined for rather big speedup
  325. if ll == 0 {
  326. // There is an exception though, when current sequence's literals_length = 0.
  327. // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
  328. // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
  329. mo++
  330. }
  331. if mo == 0 {
  332. mo = s.prevOffset[0]
  333. return
  334. }
  335. var temp int
  336. if mo == 3 {
  337. temp = s.prevOffset[0] - 1
  338. } else {
  339. temp = s.prevOffset[mo]
  340. }
  341. if temp == 0 {
  342. // 0 is not valid; input is corrupted; force offset to 1
  343. println("temp was 0")
  344. temp = 1
  345. }
  346. if mo != 1 {
  347. s.prevOffset[2] = s.prevOffset[1]
  348. }
  349. s.prevOffset[1] = s.prevOffset[0]
  350. s.prevOffset[0] = temp
  351. mo = temp
  352. return
  353. }
  354. func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
  355. // Final will not read from stream.
  356. ll, llB := llState.final()
  357. ml, mlB := mlState.final()
  358. mo, moB := ofState.final()
  359. // extra bits are stored in reverse order.
  360. br.fill()
  361. if s.maxBits <= 32 {
  362. mo += br.getBits(moB)
  363. ml += br.getBits(mlB)
  364. ll += br.getBits(llB)
  365. } else {
  366. mo += br.getBits(moB)
  367. br.fill()
  368. // matchlength+literal length, max 32 bits
  369. ml += br.getBits(mlB)
  370. ll += br.getBits(llB)
  371. }
  372. mo = s.adjustOffset(mo, ll, moB)
  373. return
  374. }
  375. func (s *sequenceDecs) adjustOffset(offset, litLen int, offsetB uint8) int {
  376. if offsetB > 1 {
  377. s.prevOffset[2] = s.prevOffset[1]
  378. s.prevOffset[1] = s.prevOffset[0]
  379. s.prevOffset[0] = offset
  380. return offset
  381. }
  382. if litLen == 0 {
  383. // There is an exception though, when current sequence's literals_length = 0.
  384. // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
  385. // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
  386. offset++
  387. }
  388. if offset == 0 {
  389. return s.prevOffset[0]
  390. }
  391. var temp int
  392. if offset == 3 {
  393. temp = s.prevOffset[0] - 1
  394. } else {
  395. temp = s.prevOffset[offset]
  396. }
  397. if temp == 0 {
  398. // 0 is not valid; input is corrupted; force offset to 1
  399. println("temp was 0")
  400. temp = 1
  401. }
  402. if offset != 1 {
  403. s.prevOffset[2] = s.prevOffset[1]
  404. }
  405. s.prevOffset[1] = s.prevOffset[0]
  406. s.prevOffset[0] = temp
  407. return temp
  408. }
  409. // mergeHistory will merge history.
  410. func (s *sequenceDecs) mergeHistory(hist *sequenceDecs) (*sequenceDecs, error) {
  411. for i := uint(0); i < 3; i++ {
  412. var sNew, sHist *sequenceDec
  413. switch i {
  414. default:
  415. // same as "case 0":
  416. sNew = &s.litLengths
  417. sHist = &hist.litLengths
  418. case 1:
  419. sNew = &s.offsets
  420. sHist = &hist.offsets
  421. case 2:
  422. sNew = &s.matchLengths
  423. sHist = &hist.matchLengths
  424. }
  425. if sNew.repeat {
  426. if sHist.fse == nil {
  427. return nil, fmt.Errorf("sequence stream %d, repeat requested, but no history", i)
  428. }
  429. continue
  430. }
  431. if sNew.fse == nil {
  432. return nil, fmt.Errorf("sequence stream %d, no fse found", i)
  433. }
  434. if sHist.fse != nil && !sHist.fse.preDefined {
  435. fseDecoderPool.Put(sHist.fse)
  436. }
  437. sHist.fse = sNew.fse
  438. }
  439. return hist, nil
  440. }