dict.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. package zstd
  2. import (
  3. "bytes"
  4. "encoding/binary"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "github.com/klauspost/compress/huff0"
  9. )
  10. type dict struct {
  11. id uint32
  12. litEnc *huff0.Scratch
  13. llDec, ofDec, mlDec sequenceDec
  14. //llEnc, ofEnc, mlEnc []*fseEncoder
  15. offsets [3]int
  16. content []byte
  17. }
  18. var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec}
  19. // ID returns the dictionary id or 0 if d is nil.
  20. func (d *dict) ID() uint32 {
  21. if d == nil {
  22. return 0
  23. }
  24. return d.id
  25. }
  26. // DictContentSize returns the dictionary content size or 0 if d is nil.
  27. func (d *dict) DictContentSize() int {
  28. if d == nil {
  29. return 0
  30. }
  31. return len(d.content)
  32. }
  33. // Load a dictionary as described in
  34. // https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
  35. func loadDict(b []byte) (*dict, error) {
  36. // Check static field size.
  37. if len(b) <= 8+(3*4) {
  38. return nil, io.ErrUnexpectedEOF
  39. }
  40. d := dict{
  41. llDec: sequenceDec{fse: &fseDecoder{}},
  42. ofDec: sequenceDec{fse: &fseDecoder{}},
  43. mlDec: sequenceDec{fse: &fseDecoder{}},
  44. }
  45. if !bytes.Equal(b[:4], dictMagic[:]) {
  46. return nil, ErrMagicMismatch
  47. }
  48. d.id = binary.LittleEndian.Uint32(b[4:8])
  49. if d.id == 0 {
  50. return nil, errors.New("dictionaries cannot have ID 0")
  51. }
  52. // Read literal table
  53. var err error
  54. d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
  55. if err != nil {
  56. return nil, err
  57. }
  58. d.litEnc.Reuse = huff0.ReusePolicyMust
  59. br := byteReader{
  60. b: b,
  61. off: 0,
  62. }
  63. readDec := func(i tableIndex, dec *fseDecoder) error {
  64. if err := dec.readNCount(&br, uint16(maxTableSymbol[i])); err != nil {
  65. return err
  66. }
  67. if br.overread() {
  68. return io.ErrUnexpectedEOF
  69. }
  70. err = dec.transform(symbolTableX[i])
  71. if err != nil {
  72. println("Transform table error:", err)
  73. return err
  74. }
  75. if debugDecoder || debugEncoder {
  76. println("Read table ok", "symbolLen:", dec.symbolLen)
  77. }
  78. // Set decoders as predefined so they aren't reused.
  79. dec.preDefined = true
  80. return nil
  81. }
  82. if err := readDec(tableOffsets, d.ofDec.fse); err != nil {
  83. return nil, err
  84. }
  85. if err := readDec(tableMatchLengths, d.mlDec.fse); err != nil {
  86. return nil, err
  87. }
  88. if err := readDec(tableLiteralLengths, d.llDec.fse); err != nil {
  89. return nil, err
  90. }
  91. if br.remain() < 12 {
  92. return nil, io.ErrUnexpectedEOF
  93. }
  94. d.offsets[0] = int(br.Uint32())
  95. br.advance(4)
  96. d.offsets[1] = int(br.Uint32())
  97. br.advance(4)
  98. d.offsets[2] = int(br.Uint32())
  99. br.advance(4)
  100. if d.offsets[0] <= 0 || d.offsets[1] <= 0 || d.offsets[2] <= 0 {
  101. return nil, errors.New("invalid offset in dictionary")
  102. }
  103. d.content = make([]byte, br.remain())
  104. copy(d.content, br.unread())
  105. if d.offsets[0] > len(d.content) || d.offsets[1] > len(d.content) || d.offsets[2] > len(d.content) {
  106. return nil, fmt.Errorf("initial offset bigger than dictionary content size %d, offsets: %v", len(d.content), d.offsets)
  107. }
  108. return &d, nil
  109. }