Reading through a tar.gz file in Go / golang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"archive/tar" | |
"compress/gzip" | |
"flag" | |
"fmt" | |
"io" | |
"os" | |
) | |
func main() { | |
// get the arguments from the command line | |
numPtr := flag.Int("n", 4, "an integer") | |
flag.Parse() | |
sourceFile := flag.Arg(0) | |
if sourceFile == "" { | |
fmt.Println("Dude, you didn't pass in a tar file!") | |
os.Exit(1) | |
} | |
fmt.Println("arg 1: ", flag.Arg(0)) | |
processFile(sourceFile, *numPtr) | |
} | |
func processFile(srcFile string, num int) { | |
f, err := os.Open(srcFile) | |
if err != nil { | |
fmt.Println(err) | |
os.Exit(1) | |
} | |
defer f.Close() | |
gzf, err := gzip.NewReader(f) | |
if err != nil { | |
fmt.Println(err) | |
os.Exit(1) | |
} | |
tarReader := tar.NewReader(gzf) | |
i := 0 | |
for { | |
header, err := tarReader.Next() | |
if err == io.EOF { | |
break | |
} | |
if err != nil { | |
fmt.Println(err) | |
os.Exit(1) | |
} | |
name := header.Name | |
switch header.Typeflag { | |
case tar.TypeDir: | |
continue | |
case tar.TypeReg: | |
fmt.Println("(", i, ")", "Name: ", name) | |
if i == num { | |
fmt.Println(" --- ") | |
io.Copy(os.Stdout, tarReader) | |
fmt.Println(" --- ") | |
os.Exit(0) | |
} | |
default: | |
fmt.Printf("%s : %c %s %s\n", | |
"Yikes! Unable to figure out type", | |
header.Typeflag, | |
"in file", | |
name, | |
) | |
} | |
i++ | |
} | |
} |
EDIT: WARNING: this code (and the ones above) have path traversal vulnerability. Please read, understand and fix before using ANY OF THESE CODE SNIPPETS: https://snyk.io/research/zip-slip-vulnerability
Thank you all! Here's an improved non-buffering version with error checking that only takes in an io.Reader (works for files, HTTP responses etc.):
package tarextract
// hat tip https://gist.github.com/indraniel/1a91458984179ab4cf80
import (
"archive/tar"
"compress/gzip"
"io"
"log"
"os"
)
func ExtractTarGz(gzipStream io.Reader) {
uncompressedStream, err := gzip.NewReader(gzipStream)
if err != nil {
log.Fatal("ExtractTarGz: NewReader failed")
}
tarReader := tar.NewReader(uncompressedStream)
for true {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
log.Fatalf("ExtractTarGz: Next() failed: %s", err.Error())
}
switch header.Typeflag {
case tar.TypeDir:
if err := os.Mkdir(header.Name, 0755); err != nil {
log.Fatalf("ExtractTarGz: Mkdir() failed: %s", err.Error())
}
case tar.TypeReg:
outFile, err := os.Create(header.Name)
if err != nil {
log.Fatalf("ExtractTarGz: Create() failed: %s", err.Error())
}
defer outFile.Close()
if _, err := io.Copy(outFile, tarReader); err != nil {
log.Fatalf("ExtractTarGz: Copy() failed: %s", err.Error())
}
default:
log.Fatalf(
"ExtractTarGz: uknown type: %s in %s",
header.Typeflag,
header.Name)
}
}
}
How to write unit test for the same ?
I am working with symbolic links so I took the liberty to add a case for those to @joonas-fi's solution. Simple addition, but it works and I hope it helps someone :)
package tarextract
// hat tip https://gist.github.com/indraniel/1a91458984179ab4cf80
import (
"archive/tar"
"compress/gzip"
"io"
"log"
"os"
)
func ExtractTarGz(gzipStream io.Reader) {
uncompressedStream, err := gzip.NewReader(gzipStream)
if err != nil {
log.Fatal("ExtractTarGz: NewReader failed")
}
tarReader := tar.NewReader(uncompressedStream)
for true {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
log.Fatalf("ExtractTarGz: Next() failed: %s", err.Error())
}
switch header.Typeflag {
case tar.TypeDir:
if err := os.Mkdir(header.Name, 0755); err != nil {
log.Fatalf("ExtractTarGz: Mkdir() failed: %s", err.Error())
}
case tar.TypeReg:
outFile, err := os.Create(header.Name)
if err != nil {
log.Fatalf("ExtractTarGz: Create() failed: %s", err.Error())
}
defer outFile.Close()
if _, err := io.Copy(outFile, tarReader); err != nil {
log.Fatalf("ExtractTarGz: Copy() failed: %s", err.Error())
}
case tar.TypeSymlink:
os.Symlink(header.Linkname, header.Name)
default:
log.Fatalf(
"ExtractTarGz: uknown type: %s in %s",
header.Typeflag,
header.Name)
}
}
}
Thanks for sharing @dkartachov, have a nice day :)
I echo @joonas-fi, thanks for the improvements @dkartachov !
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
More over, it will fail in case the file is too large. (more than a couple of kb),
You need to change the Read/WriteFile into:
f, _ := os.Create(name)
defer f.Close()
io.Copy(f, tarReader)
This will safely copy everything. (just don't forget to make proper error handling)