Skip to content

Instantly share code, notes, and snippets.

@indraniel
Created February 23, 2015 19:05
Show Gist options
  • Save indraniel/1a91458984179ab4cf80 to your computer and use it in GitHub Desktop.
Save indraniel/1a91458984179ab4cf80 to your computer and use it in GitHub Desktop.
Reading through a tar.gz file in Go / golang
package main
import (
"archive/tar"
"compress/gzip"
"flag"
"fmt"
"io"
"os"
)
func main() {
// get the arguments from the command line
numPtr := flag.Int("n", 4, "an integer")
flag.Parse()
sourceFile := flag.Arg(0)
if sourceFile == "" {
fmt.Println("Dude, you didn't pass in a tar file!")
os.Exit(1)
}
fmt.Println("arg 1: ", flag.Arg(0))
processFile(sourceFile, *numPtr)
}
func processFile(srcFile string, num int) {
f, err := os.Open(srcFile)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
defer f.Close()
gzf, err := gzip.NewReader(f)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
tarReader := tar.NewReader(gzf)
i := 0
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
fmt.Println(err)
os.Exit(1)
}
name := header.Name
switch header.Typeflag {
case tar.TypeDir:
continue
case tar.TypeReg:
fmt.Println("(", i, ")", "Name: ", name)
if i == num {
fmt.Println(" --- ")
io.Copy(os.Stdout, tarReader)
fmt.Println(" --- ")
os.Exit(0)
}
default:
fmt.Printf("%s : %c %s %s\n",
"Yikes! Unable to figure out type",
header.Typeflag,
"in file",
name,
)
}
i++
}
}
@ngrande
Copy link

ngrande commented Nov 14, 2016

Nice! Thanks

i edited the code slightly to make it usable as a package and to actually output the files (write on disk):

package main

import (
	"archive/tar"
	"compress/gzip"
	"flag"
	"fmt"
	"io"
	"io/ioutil"
	"os"
)

func main() {
	// get the arguments from the command line
	flag.Int("n", 4, "an integer")
	flag.Parse()

	sourceFile := flag.Arg(0)

	if sourceFile == "" {
		fmt.Println("Dude, you didn't pass in a tar file!")
		os.Exit(1)
	}

	fmt.Println("arg 1: ", flag.Arg(0))

	processFile(sourceFile)
}

// ReadPackedFile is a function to unpack a tar.gz
func ReadPackedFile(filepath string) {
	if filepath == "" {
		panic("Empty input!")
	}

	processFile(filepath)
}

func processFile(srcFile string) {

	f, err := os.Open(srcFile)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}
	defer f.Close()

	gzf, err := gzip.NewReader(f)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	tarReader := tar.NewReader(gzf)
	// defer io.Copy(os.Stdout, tarReader)

	for true {
		header, err := tarReader.Next()

		if err == io.EOF {
			break
		}

		if err != nil {
			fmt.Println(err)
			os.Exit(1)
		}

		name := header.Name

		switch header.Typeflag {
		case tar.TypeDir: // = directory
			fmt.Println("Directory:", name)
			os.Mkdir(name, 0755)
		case tar.TypeReg: // = regular file
			fmt.Println("Regular file:", name)
			data := make([]byte, header.Size)
			_, err := tarReader.Read(data)
			if err != nil {
				panic("Error reading file!!! PANIC!!!!!!")
			}

			ioutil.WriteFile(name, data, 0755)
		default:
			fmt.Printf("%s : %c %s %s\n",
				"Yikes! Unable to figure out type",
				header.Typeflag,
				"in file",
				name,
			)
		}
	}
}
```

@gudmundur
Copy link

When writing it to a file, the example above is missing the updating of atime and mtime of the file.

@smil2k
Copy link

smil2k commented Mar 28, 2017

More over, it will fail in case the file is too large. (more than a couple of kb),

You need to change the Read/WriteFile into:
f, _ := os.Create(name)
defer f.Close()
io.Copy(f, tarReader)

This will safely copy everything. (just don't forget to make proper error handling)

@joonas-fi
Copy link

joonas-fi commented Jun 13, 2017

EDIT: WARNING: this code (and the ones above) have path traversal vulnerability. Please read, understand and fix before using ANY OF THESE CODE SNIPPETS: https://snyk.io/research/zip-slip-vulnerability

Thank you all! Here's an improved non-buffering version with error checking that only takes in an io.Reader (works for files, HTTP responses etc.):

package tarextract

// hat tip https://gist.github.com/indraniel/1a91458984179ab4cf80

import (
	"archive/tar"
	"compress/gzip"
	"io"
	"log"
	"os"
)

func ExtractTarGz(gzipStream io.Reader) {
	uncompressedStream, err := gzip.NewReader(gzipStream)
	if err != nil {
		log.Fatal("ExtractTarGz: NewReader failed")
	}

	tarReader := tar.NewReader(uncompressedStream)

	for true {
		header, err := tarReader.Next()

		if err == io.EOF {
			break
		}

		if err != nil {
			log.Fatalf("ExtractTarGz: Next() failed: %s", err.Error())
		}

		switch header.Typeflag {
		case tar.TypeDir:
			if err := os.Mkdir(header.Name, 0755); err != nil {
				log.Fatalf("ExtractTarGz: Mkdir() failed: %s", err.Error())
			}
		case tar.TypeReg:
			outFile, err := os.Create(header.Name)
			if err != nil {
				log.Fatalf("ExtractTarGz: Create() failed: %s", err.Error())
			}
			defer outFile.Close()
			if _, err := io.Copy(outFile, tarReader); err != nil {
				log.Fatalf("ExtractTarGz: Copy() failed: %s", err.Error())
			}
		default:
			log.Fatalf(
				"ExtractTarGz: uknown type: %s in %s",
				header.Typeflag,
				header.Name)
		}
	}
}

@surajnarwade
Copy link

How to write unit test for the same ?

@dkartachov
Copy link

dkartachov commented Oct 8, 2022

I am working with symbolic links so I took the liberty to add a case for those to @joonas-fi's solution. Simple addition, but it works and I hope it helps someone :)

package tarextract

// hat tip https://gist.github.com/indraniel/1a91458984179ab4cf80

import (
	"archive/tar"
	"compress/gzip"
	"io"
	"log"
	"os"
)

func ExtractTarGz(gzipStream io.Reader) {
	uncompressedStream, err := gzip.NewReader(gzipStream)
	if err != nil {
		log.Fatal("ExtractTarGz: NewReader failed")
	}

	tarReader := tar.NewReader(uncompressedStream)

	for true {
		header, err := tarReader.Next()

		if err == io.EOF {
			break
		}

		if err != nil {
			log.Fatalf("ExtractTarGz: Next() failed: %s", err.Error())
		}

		switch header.Typeflag {
		case tar.TypeDir:
			if err := os.Mkdir(header.Name, 0755); err != nil {
				log.Fatalf("ExtractTarGz: Mkdir() failed: %s", err.Error())
			}
		case tar.TypeReg:
			outFile, err := os.Create(header.Name)
			if err != nil {
				log.Fatalf("ExtractTarGz: Create() failed: %s", err.Error())
			}
			defer outFile.Close()
			if _, err := io.Copy(outFile, tarReader); err != nil {
				log.Fatalf("ExtractTarGz: Copy() failed: %s", err.Error())
			}
                case tar.TypeSymlink:
                        os.Symlink(header.Linkname, header.Name)
		default:
			log.Fatalf(
				"ExtractTarGz: uknown type: %s in %s",
				header.Typeflag,
				header.Name)
		}
	}
}

@joonas-fi
Copy link

Thanks for sharing @dkartachov, have a nice day :)

@indraniel
Copy link
Author

I echo @joonas-fi, thanks for the improvements @dkartachov !

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment