Created
December 6, 2023 04:57
-
-
Save coxley/bd61afabb125df85c7af0b703a020dbf to your computer and use it in GitHub Desktop.
Read zstd dict
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bytes" | |
"encoding/binary" | |
"fmt" | |
) | |
// Little Endianed | |
var ZstdMagic = []byte("\x28\xB5\x2F\xFD") | |
// ReadDictID returns a dictionary ID from the first zstd frame, if present | |
// | |
// If there are multiple frames in data, we will return only the first's | |
// dictionary ID. This saves us from scanning the entire payload, with a | |
// tradeoff of assuming incompatible frames aren't being passed together. | |
func ReadDictID(data []byte) (int32, error) { | |
if !bytes.Equal(data[:4], ZstdMagic) { | |
return 0, fmt.Errorf("data malformatted: doesn't start with %x", ZstdMagic) | |
} | |
// To get the Dictionary ID, we need to know the length of the window | |
// desccriptor and the ID itself. We can ignore FCS. | |
// | |
// Format ref: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#zstandard-frames | |
fhd := data[4] | |
wd := 1 | |
ss := (fhd >> 5) & 1 | |
if ss == 1 { | |
wd = 0 | |
} | |
start := 5 + wd | |
end := start | |
switch fhd & 3 { | |
case 0: | |
break | |
case 1: | |
end += 1 | |
case 2: | |
end += 2 | |
case 3: | |
end += 4 | |
} | |
if start == end { | |
return 0, nil | |
} | |
return int32(binary.LittleEndian.Uint32(data[start:end])), nil | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment