enc_fast.go 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019
  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import (
  6. "fmt"
  7. "math"
  8. "math/bits"
  9. )
  10. const (
  11. tableBits = 15 // Bits used in the table
  12. tableSize = 1 << tableBits // Size of the table
  13. tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
  14. tableShardSize = tableSize / tableShardCnt // Size of an individual shard
  15. tableFastHashLen = 6
  16. tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
  17. maxMatchLength = 131074
  18. )
  19. type tableEntry struct {
  20. val uint32
  21. offset int32
  22. }
  23. type fastEncoder struct {
  24. fastBase
  25. table [tableSize]tableEntry
  26. }
  27. type fastEncoderDict struct {
  28. fastEncoder
  29. dictTable []tableEntry
  30. tableShardDirty [tableShardCnt]bool
  31. allDirty bool
  32. }
  33. // Encode mimmics functionality in zstd_fast.c
  34. func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
  35. const (
  36. inputMargin = 8
  37. minNonLiteralBlockSize = 1 + 1 + inputMargin
  38. )
  39. // Protect against e.cur wraparound.
  40. for e.cur >= bufferReset {
  41. if len(e.hist) == 0 {
  42. for i := range e.table[:] {
  43. e.table[i] = tableEntry{}
  44. }
  45. e.cur = e.maxMatchOff
  46. break
  47. }
  48. // Shift down everything in the table that isn't already too far away.
  49. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  50. for i := range e.table[:] {
  51. v := e.table[i].offset
  52. if v < minOff {
  53. v = 0
  54. } else {
  55. v = v - e.cur + e.maxMatchOff
  56. }
  57. e.table[i].offset = v
  58. }
  59. e.cur = e.maxMatchOff
  60. break
  61. }
  62. s := e.addBlock(src)
  63. blk.size = len(src)
  64. if len(src) < minNonLiteralBlockSize {
  65. blk.extraLits = len(src)
  66. blk.literals = blk.literals[:len(src)]
  67. copy(blk.literals, src)
  68. return
  69. }
  70. // Override src
  71. src = e.hist
  72. sLimit := int32(len(src)) - inputMargin
  73. // stepSize is the number of bytes to skip on every main loop iteration.
  74. // It should be >= 2.
  75. const stepSize = 2
  76. // TEMPLATE
  77. const hashLog = tableBits
  78. // seems global, but would be nice to tweak.
  79. const kSearchStrength = 7
  80. // nextEmit is where in src the next emitLiteral should start from.
  81. nextEmit := s
  82. cv := load6432(src, s)
  83. // Relative offsets
  84. offset1 := int32(blk.recentOffsets[0])
  85. offset2 := int32(blk.recentOffsets[1])
  86. addLiterals := func(s *seq, until int32) {
  87. if until == nextEmit {
  88. return
  89. }
  90. blk.literals = append(blk.literals, src[nextEmit:until]...)
  91. s.litLen = uint32(until - nextEmit)
  92. }
  93. if debugEncoder {
  94. println("recent offsets:", blk.recentOffsets)
  95. }
  96. encodeLoop:
  97. for {
  98. // t will contain the match offset when we find one.
  99. // When existing the search loop, we have already checked 4 bytes.
  100. var t int32
  101. // We will not use repeat offsets across blocks.
  102. // By not using them for the first 3 matches
  103. canRepeat := len(blk.sequences) > 2
  104. for {
  105. if debugAsserts && canRepeat && offset1 == 0 {
  106. panic("offset0 was 0")
  107. }
  108. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  109. nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
  110. candidate := e.table[nextHash]
  111. candidate2 := e.table[nextHash2]
  112. repIndex := s - offset1 + 2
  113. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  114. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  115. if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
  116. // Consider history as well.
  117. var seq seq
  118. var length int32
  119. // length = 4 + e.matchlen(s+6, repIndex+4, src)
  120. {
  121. a := src[s+6:]
  122. b := src[repIndex+4:]
  123. endI := len(a) & (math.MaxInt32 - 7)
  124. length = int32(endI) + 4
  125. for i := 0; i < endI; i += 8 {
  126. if diff := load64(a, i) ^ load64(b, i); diff != 0 {
  127. length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
  128. break
  129. }
  130. }
  131. }
  132. seq.matchLen = uint32(length - zstdMinMatch)
  133. // We might be able to match backwards.
  134. // Extend as long as we can.
  135. start := s + 2
  136. // We end the search early, so we don't risk 0 literals
  137. // and have to do special offset treatment.
  138. startLimit := nextEmit + 1
  139. sMin := s - e.maxMatchOff
  140. if sMin < 0 {
  141. sMin = 0
  142. }
  143. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
  144. repIndex--
  145. start--
  146. seq.matchLen++
  147. }
  148. addLiterals(&seq, start)
  149. // rep 0
  150. seq.offset = 1
  151. if debugSequences {
  152. println("repeat sequence", seq, "next s:", s)
  153. }
  154. blk.sequences = append(blk.sequences, seq)
  155. s += length + 2
  156. nextEmit = s
  157. if s >= sLimit {
  158. if debugEncoder {
  159. println("repeat ended", s, length)
  160. }
  161. break encodeLoop
  162. }
  163. cv = load6432(src, s)
  164. continue
  165. }
  166. coffset0 := s - (candidate.offset - e.cur)
  167. coffset1 := s - (candidate2.offset - e.cur) + 1
  168. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  169. // found a regular match
  170. t = candidate.offset - e.cur
  171. if debugAsserts && s <= t {
  172. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  173. }
  174. if debugAsserts && s-t > e.maxMatchOff {
  175. panic("s - t >e.maxMatchOff")
  176. }
  177. break
  178. }
  179. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  180. // found a regular match
  181. t = candidate2.offset - e.cur
  182. s++
  183. if debugAsserts && s <= t {
  184. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  185. }
  186. if debugAsserts && s-t > e.maxMatchOff {
  187. panic("s - t >e.maxMatchOff")
  188. }
  189. if debugAsserts && t < 0 {
  190. panic("t<0")
  191. }
  192. break
  193. }
  194. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  195. if s >= sLimit {
  196. break encodeLoop
  197. }
  198. cv = load6432(src, s)
  199. }
  200. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  201. offset2 = offset1
  202. offset1 = s - t
  203. if debugAsserts && s <= t {
  204. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  205. }
  206. if debugAsserts && canRepeat && int(offset1) > len(src) {
  207. panic("invalid offset")
  208. }
  209. // Extend the 4-byte match as long as possible.
  210. //l := e.matchlen(s+4, t+4, src) + 4
  211. var l int32
  212. {
  213. a := src[s+4:]
  214. b := src[t+4:]
  215. endI := len(a) & (math.MaxInt32 - 7)
  216. l = int32(endI) + 4
  217. for i := 0; i < endI; i += 8 {
  218. if diff := load64(a, i) ^ load64(b, i); diff != 0 {
  219. l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
  220. break
  221. }
  222. }
  223. }
  224. // Extend backwards
  225. tMin := s - e.maxMatchOff
  226. if tMin < 0 {
  227. tMin = 0
  228. }
  229. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  230. s--
  231. t--
  232. l++
  233. }
  234. // Write our sequence.
  235. var seq seq
  236. seq.litLen = uint32(s - nextEmit)
  237. seq.matchLen = uint32(l - zstdMinMatch)
  238. if seq.litLen > 0 {
  239. blk.literals = append(blk.literals, src[nextEmit:s]...)
  240. }
  241. // Don't use repeat offsets
  242. seq.offset = uint32(s-t) + 3
  243. s += l
  244. if debugSequences {
  245. println("sequence", seq, "next s:", s)
  246. }
  247. blk.sequences = append(blk.sequences, seq)
  248. nextEmit = s
  249. if s >= sLimit {
  250. break encodeLoop
  251. }
  252. cv = load6432(src, s)
  253. // Check offset 2
  254. if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
  255. // We have at least 4 byte match.
  256. // No need to check backwards. We come straight from a match
  257. //l := 4 + e.matchlen(s+4, o2+4, src)
  258. var l int32
  259. {
  260. a := src[s+4:]
  261. b := src[o2+4:]
  262. endI := len(a) & (math.MaxInt32 - 7)
  263. l = int32(endI) + 4
  264. for i := 0; i < endI; i += 8 {
  265. if diff := load64(a, i) ^ load64(b, i); diff != 0 {
  266. l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
  267. break
  268. }
  269. }
  270. }
  271. // Store this, since we have it.
  272. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  273. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  274. seq.matchLen = uint32(l) - zstdMinMatch
  275. seq.litLen = 0
  276. // Since litlen is always 0, this is offset 1.
  277. seq.offset = 1
  278. s += l
  279. nextEmit = s
  280. if debugSequences {
  281. println("sequence", seq, "next s:", s)
  282. }
  283. blk.sequences = append(blk.sequences, seq)
  284. // Swap offset 1 and 2.
  285. offset1, offset2 = offset2, offset1
  286. if s >= sLimit {
  287. break encodeLoop
  288. }
  289. // Prepare next loop.
  290. cv = load6432(src, s)
  291. }
  292. }
  293. if int(nextEmit) < len(src) {
  294. blk.literals = append(blk.literals, src[nextEmit:]...)
  295. blk.extraLits = len(src) - int(nextEmit)
  296. }
  297. blk.recentOffsets[0] = uint32(offset1)
  298. blk.recentOffsets[1] = uint32(offset2)
  299. if debugEncoder {
  300. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  301. }
  302. }
  303. // EncodeNoHist will encode a block with no history and no following blocks.
  304. // Most notable difference is that src will not be copied for history and
  305. // we do not need to check for max match length.
  306. func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
  307. const (
  308. inputMargin = 8
  309. minNonLiteralBlockSize = 1 + 1 + inputMargin
  310. )
  311. if debugEncoder {
  312. if len(src) > maxBlockSize {
  313. panic("src too big")
  314. }
  315. }
  316. // Protect against e.cur wraparound.
  317. if e.cur >= bufferReset {
  318. for i := range e.table[:] {
  319. e.table[i] = tableEntry{}
  320. }
  321. e.cur = e.maxMatchOff
  322. }
  323. s := int32(0)
  324. blk.size = len(src)
  325. if len(src) < minNonLiteralBlockSize {
  326. blk.extraLits = len(src)
  327. blk.literals = blk.literals[:len(src)]
  328. copy(blk.literals, src)
  329. return
  330. }
  331. sLimit := int32(len(src)) - inputMargin
  332. // stepSize is the number of bytes to skip on every main loop iteration.
  333. // It should be >= 2.
  334. const stepSize = 2
  335. // TEMPLATE
  336. const hashLog = tableBits
  337. // seems global, but would be nice to tweak.
  338. const kSearchStrength = 8
  339. // nextEmit is where in src the next emitLiteral should start from.
  340. nextEmit := s
  341. cv := load6432(src, s)
  342. // Relative offsets
  343. offset1 := int32(blk.recentOffsets[0])
  344. offset2 := int32(blk.recentOffsets[1])
  345. addLiterals := func(s *seq, until int32) {
  346. if until == nextEmit {
  347. return
  348. }
  349. blk.literals = append(blk.literals, src[nextEmit:until]...)
  350. s.litLen = uint32(until - nextEmit)
  351. }
  352. if debugEncoder {
  353. println("recent offsets:", blk.recentOffsets)
  354. }
  355. encodeLoop:
  356. for {
  357. // t will contain the match offset when we find one.
  358. // When existing the search loop, we have already checked 4 bytes.
  359. var t int32
  360. // We will not use repeat offsets across blocks.
  361. // By not using them for the first 3 matches
  362. for {
  363. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  364. nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
  365. candidate := e.table[nextHash]
  366. candidate2 := e.table[nextHash2]
  367. repIndex := s - offset1 + 2
  368. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  369. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  370. if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
  371. // Consider history as well.
  372. var seq seq
  373. // length := 4 + e.matchlen(s+6, repIndex+4, src)
  374. // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
  375. var length int32
  376. {
  377. a := src[s+6:]
  378. b := src[repIndex+4:]
  379. endI := len(a) & (math.MaxInt32 - 7)
  380. length = int32(endI) + 4
  381. for i := 0; i < endI; i += 8 {
  382. if diff := load64(a, i) ^ load64(b, i); diff != 0 {
  383. length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
  384. break
  385. }
  386. }
  387. }
  388. seq.matchLen = uint32(length - zstdMinMatch)
  389. // We might be able to match backwards.
  390. // Extend as long as we can.
  391. start := s + 2
  392. // We end the search early, so we don't risk 0 literals
  393. // and have to do special offset treatment.
  394. startLimit := nextEmit + 1
  395. sMin := s - e.maxMatchOff
  396. if sMin < 0 {
  397. sMin = 0
  398. }
  399. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
  400. repIndex--
  401. start--
  402. seq.matchLen++
  403. }
  404. addLiterals(&seq, start)
  405. // rep 0
  406. seq.offset = 1
  407. if debugSequences {
  408. println("repeat sequence", seq, "next s:", s)
  409. }
  410. blk.sequences = append(blk.sequences, seq)
  411. s += length + 2
  412. nextEmit = s
  413. if s >= sLimit {
  414. if debugEncoder {
  415. println("repeat ended", s, length)
  416. }
  417. break encodeLoop
  418. }
  419. cv = load6432(src, s)
  420. continue
  421. }
  422. coffset0 := s - (candidate.offset - e.cur)
  423. coffset1 := s - (candidate2.offset - e.cur) + 1
  424. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  425. // found a regular match
  426. t = candidate.offset - e.cur
  427. if debugAsserts && s <= t {
  428. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  429. }
  430. if debugAsserts && s-t > e.maxMatchOff {
  431. panic("s - t >e.maxMatchOff")
  432. }
  433. if debugAsserts && t < 0 {
  434. panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
  435. }
  436. break
  437. }
  438. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  439. // found a regular match
  440. t = candidate2.offset - e.cur
  441. s++
  442. if debugAsserts && s <= t {
  443. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  444. }
  445. if debugAsserts && s-t > e.maxMatchOff {
  446. panic("s - t >e.maxMatchOff")
  447. }
  448. if debugAsserts && t < 0 {
  449. panic("t<0")
  450. }
  451. break
  452. }
  453. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  454. if s >= sLimit {
  455. break encodeLoop
  456. }
  457. cv = load6432(src, s)
  458. }
  459. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  460. offset2 = offset1
  461. offset1 = s - t
  462. if debugAsserts && s <= t {
  463. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  464. }
  465. if debugAsserts && t < 0 {
  466. panic(fmt.Sprintf("t (%d) < 0 ", t))
  467. }
  468. // Extend the 4-byte match as long as possible.
  469. //l := e.matchlenNoHist(s+4, t+4, src) + 4
  470. // l := int32(matchLen(src[s+4:], src[t+4:])) + 4
  471. var l int32
  472. {
  473. a := src[s+4:]
  474. b := src[t+4:]
  475. endI := len(a) & (math.MaxInt32 - 7)
  476. l = int32(endI) + 4
  477. for i := 0; i < endI; i += 8 {
  478. if diff := load64(a, i) ^ load64(b, i); diff != 0 {
  479. l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
  480. break
  481. }
  482. }
  483. }
  484. // Extend backwards
  485. tMin := s - e.maxMatchOff
  486. if tMin < 0 {
  487. tMin = 0
  488. }
  489. for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
  490. s--
  491. t--
  492. l++
  493. }
  494. // Write our sequence.
  495. var seq seq
  496. seq.litLen = uint32(s - nextEmit)
  497. seq.matchLen = uint32(l - zstdMinMatch)
  498. if seq.litLen > 0 {
  499. blk.literals = append(blk.literals, src[nextEmit:s]...)
  500. }
  501. // Don't use repeat offsets
  502. seq.offset = uint32(s-t) + 3
  503. s += l
  504. if debugSequences {
  505. println("sequence", seq, "next s:", s)
  506. }
  507. blk.sequences = append(blk.sequences, seq)
  508. nextEmit = s
  509. if s >= sLimit {
  510. break encodeLoop
  511. }
  512. cv = load6432(src, s)
  513. // Check offset 2
  514. if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
  515. // We have at least 4 byte match.
  516. // No need to check backwards. We come straight from a match
  517. //l := 4 + e.matchlenNoHist(s+4, o2+4, src)
  518. // l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
  519. var l int32
  520. {
  521. a := src[s+4:]
  522. b := src[o2+4:]
  523. endI := len(a) & (math.MaxInt32 - 7)
  524. l = int32(endI) + 4
  525. for i := 0; i < endI; i += 8 {
  526. if diff := load64(a, i) ^ load64(b, i); diff != 0 {
  527. l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
  528. break
  529. }
  530. }
  531. }
  532. // Store this, since we have it.
  533. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  534. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  535. seq.matchLen = uint32(l) - zstdMinMatch
  536. seq.litLen = 0
  537. // Since litlen is always 0, this is offset 1.
  538. seq.offset = 1
  539. s += l
  540. nextEmit = s
  541. if debugSequences {
  542. println("sequence", seq, "next s:", s)
  543. }
  544. blk.sequences = append(blk.sequences, seq)
  545. // Swap offset 1 and 2.
  546. offset1, offset2 = offset2, offset1
  547. if s >= sLimit {
  548. break encodeLoop
  549. }
  550. // Prepare next loop.
  551. cv = load6432(src, s)
  552. }
  553. }
  554. if int(nextEmit) < len(src) {
  555. blk.literals = append(blk.literals, src[nextEmit:]...)
  556. blk.extraLits = len(src) - int(nextEmit)
  557. }
  558. if debugEncoder {
  559. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  560. }
  561. // We do not store history, so we must offset e.cur to avoid false matches for next user.
  562. if e.cur < bufferReset {
  563. e.cur += int32(len(src))
  564. }
  565. }
  566. // Encode will encode the content, with a dictionary if initialized for it.
  567. func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
  568. const (
  569. inputMargin = 8
  570. minNonLiteralBlockSize = 1 + 1 + inputMargin
  571. )
  572. if e.allDirty || len(src) > 32<<10 {
  573. e.fastEncoder.Encode(blk, src)
  574. e.allDirty = true
  575. return
  576. }
  577. // Protect against e.cur wraparound.
  578. for e.cur >= bufferReset {
  579. if len(e.hist) == 0 {
  580. for i := range e.table[:] {
  581. e.table[i] = tableEntry{}
  582. }
  583. e.cur = e.maxMatchOff
  584. break
  585. }
  586. // Shift down everything in the table that isn't already too far away.
  587. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  588. for i := range e.table[:] {
  589. v := e.table[i].offset
  590. if v < minOff {
  591. v = 0
  592. } else {
  593. v = v - e.cur + e.maxMatchOff
  594. }
  595. e.table[i].offset = v
  596. }
  597. e.cur = e.maxMatchOff
  598. break
  599. }
  600. s := e.addBlock(src)
  601. blk.size = len(src)
  602. if len(src) < minNonLiteralBlockSize {
  603. blk.extraLits = len(src)
  604. blk.literals = blk.literals[:len(src)]
  605. copy(blk.literals, src)
  606. return
  607. }
  608. // Override src
  609. src = e.hist
  610. sLimit := int32(len(src)) - inputMargin
  611. // stepSize is the number of bytes to skip on every main loop iteration.
  612. // It should be >= 2.
  613. const stepSize = 2
  614. // TEMPLATE
  615. const hashLog = tableBits
  616. // seems global, but would be nice to tweak.
  617. const kSearchStrength = 7
  618. // nextEmit is where in src the next emitLiteral should start from.
  619. nextEmit := s
  620. cv := load6432(src, s)
  621. // Relative offsets
  622. offset1 := int32(blk.recentOffsets[0])
  623. offset2 := int32(blk.recentOffsets[1])
  624. addLiterals := func(s *seq, until int32) {
  625. if until == nextEmit {
  626. return
  627. }
  628. blk.literals = append(blk.literals, src[nextEmit:until]...)
  629. s.litLen = uint32(until - nextEmit)
  630. }
  631. if debugEncoder {
  632. println("recent offsets:", blk.recentOffsets)
  633. }
  634. encodeLoop:
  635. for {
  636. // t will contain the match offset when we find one.
  637. // When existing the search loop, we have already checked 4 bytes.
  638. var t int32
  639. // We will not use repeat offsets across blocks.
  640. // By not using them for the first 3 matches
  641. canRepeat := len(blk.sequences) > 2
  642. for {
  643. if debugAsserts && canRepeat && offset1 == 0 {
  644. panic("offset0 was 0")
  645. }
  646. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  647. nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
  648. candidate := e.table[nextHash]
  649. candidate2 := e.table[nextHash2]
  650. repIndex := s - offset1 + 2
  651. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  652. e.markShardDirty(nextHash)
  653. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  654. e.markShardDirty(nextHash2)
  655. if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
  656. // Consider history as well.
  657. var seq seq
  658. var length int32
  659. // length = 4 + e.matchlen(s+6, repIndex+4, src)
  660. {
  661. a := src[s+6:]
  662. b := src[repIndex+4:]
  663. endI := len(a) & (math.MaxInt32 - 7)
  664. length = int32(endI) + 4
  665. for i := 0; i < endI; i += 8 {
  666. if diff := load64(a, i) ^ load64(b, i); diff != 0 {
  667. length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
  668. break
  669. }
  670. }
  671. }
  672. seq.matchLen = uint32(length - zstdMinMatch)
  673. // We might be able to match backwards.
  674. // Extend as long as we can.
  675. start := s + 2
  676. // We end the search early, so we don't risk 0 literals
  677. // and have to do special offset treatment.
  678. startLimit := nextEmit + 1
  679. sMin := s - e.maxMatchOff
  680. if sMin < 0 {
  681. sMin = 0
  682. }
  683. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
  684. repIndex--
  685. start--
  686. seq.matchLen++
  687. }
  688. addLiterals(&seq, start)
  689. // rep 0
  690. seq.offset = 1
  691. if debugSequences {
  692. println("repeat sequence", seq, "next s:", s)
  693. }
  694. blk.sequences = append(blk.sequences, seq)
  695. s += length + 2
  696. nextEmit = s
  697. if s >= sLimit {
  698. if debugEncoder {
  699. println("repeat ended", s, length)
  700. }
  701. break encodeLoop
  702. }
  703. cv = load6432(src, s)
  704. continue
  705. }
  706. coffset0 := s - (candidate.offset - e.cur)
  707. coffset1 := s - (candidate2.offset - e.cur) + 1
  708. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  709. // found a regular match
  710. t = candidate.offset - e.cur
  711. if debugAsserts && s <= t {
  712. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  713. }
  714. if debugAsserts && s-t > e.maxMatchOff {
  715. panic("s - t >e.maxMatchOff")
  716. }
  717. break
  718. }
  719. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  720. // found a regular match
  721. t = candidate2.offset - e.cur
  722. s++
  723. if debugAsserts && s <= t {
  724. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  725. }
  726. if debugAsserts && s-t > e.maxMatchOff {
  727. panic("s - t >e.maxMatchOff")
  728. }
  729. if debugAsserts && t < 0 {
  730. panic("t<0")
  731. }
  732. break
  733. }
  734. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  735. if s >= sLimit {
  736. break encodeLoop
  737. }
  738. cv = load6432(src, s)
  739. }
  740. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  741. offset2 = offset1
  742. offset1 = s - t
  743. if debugAsserts && s <= t {
  744. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  745. }
  746. if debugAsserts && canRepeat && int(offset1) > len(src) {
  747. panic("invalid offset")
  748. }
  749. // Extend the 4-byte match as long as possible.
  750. //l := e.matchlen(s+4, t+4, src) + 4
  751. var l int32
  752. {
  753. a := src[s+4:]
  754. b := src[t+4:]
  755. endI := len(a) & (math.MaxInt32 - 7)
  756. l = int32(endI) + 4
  757. for i := 0; i < endI; i += 8 {
  758. if diff := load64(a, i) ^ load64(b, i); diff != 0 {
  759. l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
  760. break
  761. }
  762. }
  763. }
  764. // Extend backwards
  765. tMin := s - e.maxMatchOff
  766. if tMin < 0 {
  767. tMin = 0
  768. }
  769. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  770. s--
  771. t--
  772. l++
  773. }
  774. // Write our sequence.
  775. var seq seq
  776. seq.litLen = uint32(s - nextEmit)
  777. seq.matchLen = uint32(l - zstdMinMatch)
  778. if seq.litLen > 0 {
  779. blk.literals = append(blk.literals, src[nextEmit:s]...)
  780. }
  781. // Don't use repeat offsets
  782. seq.offset = uint32(s-t) + 3
  783. s += l
  784. if debugSequences {
  785. println("sequence", seq, "next s:", s)
  786. }
  787. blk.sequences = append(blk.sequences, seq)
  788. nextEmit = s
  789. if s >= sLimit {
  790. break encodeLoop
  791. }
  792. cv = load6432(src, s)
  793. // Check offset 2
  794. if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
  795. // We have at least 4 byte match.
  796. // No need to check backwards. We come straight from a match
  797. //l := 4 + e.matchlen(s+4, o2+4, src)
  798. var l int32
  799. {
  800. a := src[s+4:]
  801. b := src[o2+4:]
  802. endI := len(a) & (math.MaxInt32 - 7)
  803. l = int32(endI) + 4
  804. for i := 0; i < endI; i += 8 {
  805. if diff := load64(a, i) ^ load64(b, i); diff != 0 {
  806. l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
  807. break
  808. }
  809. }
  810. }
  811. // Store this, since we have it.
  812. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  813. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  814. e.markShardDirty(nextHash)
  815. seq.matchLen = uint32(l) - zstdMinMatch
  816. seq.litLen = 0
  817. // Since litlen is always 0, this is offset 1.
  818. seq.offset = 1
  819. s += l
  820. nextEmit = s
  821. if debugSequences {
  822. println("sequence", seq, "next s:", s)
  823. }
  824. blk.sequences = append(blk.sequences, seq)
  825. // Swap offset 1 and 2.
  826. offset1, offset2 = offset2, offset1
  827. if s >= sLimit {
  828. break encodeLoop
  829. }
  830. // Prepare next loop.
  831. cv = load6432(src, s)
  832. }
  833. }
  834. if int(nextEmit) < len(src) {
  835. blk.literals = append(blk.literals, src[nextEmit:]...)
  836. blk.extraLits = len(src) - int(nextEmit)
  837. }
  838. blk.recentOffsets[0] = uint32(offset1)
  839. blk.recentOffsets[1] = uint32(offset2)
  840. if debugEncoder {
  841. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  842. }
  843. }
  844. // ResetDict will reset and set a dictionary if not nil
  845. func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
  846. e.resetBase(d, singleBlock)
  847. if d != nil {
  848. panic("fastEncoder: Reset with dict")
  849. }
  850. }
  851. // ResetDict will reset and set a dictionary if not nil
  852. func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
  853. e.resetBase(d, singleBlock)
  854. if d == nil {
  855. return
  856. }
  857. // Init or copy dict table
  858. if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
  859. if len(e.dictTable) != len(e.table) {
  860. e.dictTable = make([]tableEntry, len(e.table))
  861. }
  862. if true {
  863. end := e.maxMatchOff + int32(len(d.content)) - 8
  864. for i := e.maxMatchOff; i < end; i += 3 {
  865. const hashLog = tableBits
  866. cv := load6432(d.content, i-e.maxMatchOff)
  867. nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 5
  868. nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 6
  869. nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7
  870. e.dictTable[nextHash] = tableEntry{
  871. val: uint32(cv),
  872. offset: i,
  873. }
  874. e.dictTable[nextHash1] = tableEntry{
  875. val: uint32(cv >> 8),
  876. offset: i + 1,
  877. }
  878. e.dictTable[nextHash2] = tableEntry{
  879. val: uint32(cv >> 16),
  880. offset: i + 2,
  881. }
  882. }
  883. }
  884. e.lastDictID = d.id
  885. e.allDirty = true
  886. }
  887. e.cur = e.maxMatchOff
  888. dirtyShardCnt := 0
  889. if !e.allDirty {
  890. for i := range e.tableShardDirty {
  891. if e.tableShardDirty[i] {
  892. dirtyShardCnt++
  893. }
  894. }
  895. }
  896. const shardCnt = tableShardCnt
  897. const shardSize = tableShardSize
  898. if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
  899. copy(e.table[:], e.dictTable)
  900. for i := range e.tableShardDirty {
  901. e.tableShardDirty[i] = false
  902. }
  903. e.allDirty = false
  904. return
  905. }
  906. for i := range e.tableShardDirty {
  907. if !e.tableShardDirty[i] {
  908. continue
  909. }
  910. copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
  911. e.tableShardDirty[i] = false
  912. }
  913. e.allDirty = false
  914. }
  915. func (e *fastEncoderDict) markAllShardsDirty() {
  916. e.allDirty = true
  917. }
  918. func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
  919. e.tableShardDirty[entryNum/tableShardSize] = true
  920. }