Skip to content

Instantly share code, notes, and snippets.

@klauspost
klauspost / gen.go
Last active March 16, 2022 18:24
zstd decomp - avo
package main
//go:generate go run gen.go -out seqdec_amd64-avo.s -stubs delme.go -pkg=zstd
import (
"flag"
"fmt"
"io/ioutil"
"os"
"path/filepath"
#include "textflag.h"
#include "funcdata.h"
#include "go_asm.h"
#define mask $0x7ff // up to 11 bits, according to the spec
#define bufoff 256 // see decompress.go, we're using [4][256]byte
//func decompress4x_main_loop_bmi1(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
TEXT ·decompress4x_main_loop_bmi1(SB), NOSPLIT, $0
package operand
// CalcLogTern will calculate VPTERNLOGD/VPTERNLOGD based on a function.
// Argument order is AT&T, a being *mm3, b being *mm2, c being *mm1 and destination.
// This can be used directly in VPTERNLOGD(a, b, c, CalcLogTern(...))
func CalcLogTern(fn func(a, b, c bool) bool) U8 {
var res U8
for ai, av := range []bool{false, true} {
for bi, bv := range []bool{false, true} {
for ci, cv := range []bool{false, true} {
2019/10/12 13:17:16 process finished successfully
2019/10/12 13:17:16 Running fuzzing with: ./fuzzer -print_final_stats=1 -exact_artifact_path=./artifact -error_exitcode=76 -max_total_time=3600 corpus seed -rss_limit_mb=1984
FUZZER: INFO: Seed: 1948056531
FUZZER: INFO: 65536 Extra Counters
FUZZER: INFO: 557 files found in corpus
FUZZER: INFO: 588 files found in seed
FUZZER: INFO: -max_len is not provided; libFuzzer will not generate inputs larger than 1048576 bytes
FUZZER: INFO: seed corpus: files: 1145 min: 1b max: 1048576b total: 113712584b rss: 776Mb
FUZZER: #256 pulse ft: 1565 corp: 92/921b lim: 4 exec/s: 85 rss: 776Mb
FUZZER: #512 pulse ft: 2480 corp: 215/5035b lim: 4 exec/s: 73 rss: 776Mb
@klauspost
klauspost / gist:f5df3a3522ac4bcb3bcde448872dffe6
Last active June 1, 2019 11:06
Compression memory test
func BenchmarkCompressAllocations(b *testing.B) {
payload := []byte(strings.Repeat("Tiny payload", 20))
for j := -2; j <= 9; j++ {
b.Run("level("+strconv.Itoa(j)+")", func(b *testing.B) {
b.Run("flate", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
w, err := flate.NewWriter(ioutil.Discard, j)
#
# Finds the rounded down square root of 8 bit value value in 14.
# Uses self-modifying code to store result, so
# instruction 11 (LDI 0) and the two values (14, 15) needs to be reset between runs.
#
LDA 14
SUB 15
JPC 11
STA 14

Keybase proof

I hereby claim:

  • I am klauspost on github.
  • I am klauspost (https://keybase.io/klauspost) on keybase.
  • I have a public key ASD53DNo7kJeDE1yhLrl2x5X1So398SG0jwkzn5ozDA6xAo

To claim this, I am signing this object:

package bench
import (
"bytes"
"compress/gzip"
ogzip "github.com/klauspost/compress/gzip"
"testing"
)
var bidReq = []byte(`{"id":"50215d10a41d474f77591bff601f6ade","imp":[{"id":"86df3bc6-7bd4-44d9-64e2-584a69790229","native":{"request":"{\"ver\":\"1.0\",\"plcmtcnt\":1,\"assets\":[{\"id\":1,\"data\":{\"type\":12}},{\"id\":2,\"required\":1,\"title\":{\"len\":50}},{\"id\":3,\"required\":1,\"img\":{\"type\":1,\"w\":80,\"h\":80}},{\"id\":4,\"required\":1,\"img\":{\"type\":3,\"w\":1200,\"h\":627}},{\"id\":5,\"data\":{\"type\":3}},{\"id\":6,\"required\":1,\"data\":{\"type\":2,\"len\":100}}]}","ver":"1.0"},"tagid":"1","bidfloor":0.6,"bidfloorcur":"USD"}],"site":{"id":"1012864","domain":"www.abc.com","cat":["IAB3"],"mobile":1,"keywords":"apps,games,discovery,recommendation"},"device":{"dnt":1,"ua":"Mozilla/5.0 (Linux; U; Android 4.2.2; km-kh; SHV-E120S Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30","ip":"175.100.59.170","geo":{"lat":11.5625,"lon":104.916,"country":"KHM","r
//+build ignore
package main
// Adapted from : https://gist.github.com/arnehormann/65421048f56ac108f6b5
import (
"bufio"
"encoding/binary"
"flag"
PASS
BenchmarkShift-4 20000 69666 ns/op 940.71 MB/s
BenchmarkAnd-4 20000 66802 ns/op 981.04 MB/s
ok _/c_/Temp/shifttest 4.146s
PASS
BenchmarkShift-4 20000 68680 ns/op 954.21 MB/s
BenchmarkAnd-4 20000 67339 ns/op 973.21 MB/s
ok _/c_/Temp/shifttest 4.121s
PASS
BenchmarkShift-4 20000 68687 ns/op 954.12 MB/s