Last active
September 23, 2019 10:19
-
-
Save rasky/d42a52c16683f1a2f4dccdef80e2712d to your computer and use it in GitHub Desktop.
Go - faster gzip reader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package input | |
import ( | |
"bytes" | |
"errors" | |
"io" | |
"os/exec" | |
"strings" | |
) | |
// fastGzReader is an API-compatible drop-in replacement | |
// for gzip.Reader, that achieves a higher decoding speed | |
// by spawning an external zcat instance and pipeing data | |
// through it. | |
// Go's native gzip implementation is about 2x slower at | |
// decompressing data compared to zlib (mostly due to Go compiler | |
// inefficiencies). So for tasks where the gzip decoding | |
// speed is important, this is a quick workaround that doesn't | |
// require cgo. | |
// zcat is part of the gzip package and comes preinstalled on | |
// most Linux distributions and on OSX. | |
type fastGzReader struct { | |
io.Reader | |
stderr bytes.Buffer | |
close func() error | |
} | |
func newFastGzReader(r io.Reader) (fastGzReader, error) { | |
var gz fastGzReader | |
if err := gz.Reset(r); err != nil { | |
return fastGzReader{}, err | |
} | |
return gz, nil | |
} | |
func (gz *fastGzReader) Reset(r io.Reader) error { | |
if gz.close != nil { | |
gz.close() | |
} | |
cmd := exec.Command("zcat") | |
cmd.Stdin = r | |
cmd.Stderr = &gz.stderr | |
rpipe, err := cmd.StdoutPipe() | |
if err != nil { | |
return err | |
} | |
err = cmd.Start() | |
if err != nil { | |
rpipe.Close() | |
return err | |
} | |
gz.Reader = rpipe | |
gz.close = func() error { | |
rpipe.Close() | |
if err := cmd.Wait(); err != nil { | |
if _, ok := err.(*exec.ExitError); ok { | |
return errors.New(strings.TrimSpace(gz.stderr.String())) | |
} | |
} | |
return err | |
} | |
return nil | |
} | |
func (gz fastGzReader) Close() error { | |
return gz.close() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@brentp sorry for the delay; no problem with that. Consider this MIT licensed.