Skip to content

Instantly share code, notes, and snippets.

@miku
Last active March 5, 2020 23:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save miku/50e4ed2aae663d6ab5bfecce57c42d3b to your computer and use it in GitHub Desktop.
Save miku/50e4ed2aae663d6ab5bfecce57c42d3b to your computer and use it in GitHub Desktop.
Like zcat but reads one or more files from stdin (e.g. from find)
module gist.github.com/miku/50e4ed2aae663d6ab5bfecce57c42d3b
go 1.13
require (
github.com/klauspost/compress v1.10.2 // indirect
github.com/klauspost/pgzip v1.2.1
github.com/miku/parallel v0.0.0-20190514155252-d289185a944b
)
github.com/klauspost/compress v1.10.2 h1:Znfn6hXZAHaLPNnlqUYRrBSReFHYybslgv4PTiyz6P0=
github.com/klauspost/compress v1.10.2/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
github.com/klauspost/pgzip v1.2.1 h1:oIPZROsWuPHpOdMVWLuJZXwgjhrW8r1yEX8UqMyeNHM=
github.com/klauspost/pgzip v1.2.1/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/miku/parallel v0.0.0-20190514155252-d289185a944b h1:O68zLMmi8g7xdZRgPpDyCJOL4A7JhlHrr0riETJa6lI=
github.com/miku/parallel v0.0.0-20190514155252-d289185a944b/go.mod h1:m4hVixrXwk3DUp5cQ1j661BsHpjqSc/SfXE0uUMxmAw=
SHELL := /bin/bash
TARGETS := zrcat
.PHONY: all
all: $(TARGETS)
%: %.go
go build -o $@ $<
.PHONY: clean
clean:
rm -f $(TARGETS)
// zrcat reads a list of gzip compressed filenames from stdin (e.g. from find)
// and decompresses the content to stdout, in parallel. Use case: Decompress
// content of millions of gzip files fast to stdout without spawning millions
// of processes.
//
package main
import (
"bufio"
"bytes"
"flag"
"io"
"log"
"os"
"runtime"
gzip "github.com/klauspost/pgzip"
"github.com/miku/parallel"
)
var (
bestEffort = flag.Bool("b", false, "best effort")
numWorkers = flag.Int("w", runtime.NumCPU(), "number of cpus")
batchSize = flag.Int("s", 10000, "batch size")
)
func main() {
bw := bufio.NewWriter(os.Stdout)
defer bw.Flush()
pp := parallel.NewProcessor(os.Stdin, bw, func(p []byte) ([]byte, error) {
filename := string(bytes.TrimSpace(p))
if filename == "" {
return nil, nil
}
f, err := os.Open(filename)
if err != nil {
if *bestEffort {
log.Printf("open: %v", err)
return nil, nil
} else {
return nil, err
}
}
defer f.Close()
zr, err := gzip.NewReader(f)
if err != nil {
if *bestEffort {
log.Printf("gzip: %v", err)
return nil, nil
} else {
return nil, err
}
}
defer zr.Close()
var buf bytes.Buffer
_, err = io.Copy(&buf, zr)
if err != nil {
if *bestEffort {
log.Printf("copy: %v", err)
return nil, nil
} else {
return nil, err
}
}
return buf.Bytes(), nil
})
pp.NumWorkers = *numWorkers
pp.BatchSize = *batchSize
if err := pp.Run(); err != nil {
if *bestEffort {
log.Printf("run: %v", err)
} else {
log.Fatal(err)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment