Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Reading through a tar.gz file in Go / golang
package main
import (
"archive/tar"
"compress/gzip"
"flag"
"fmt"
"io"
"os"
)
func main() {
// get the arguments from the command line
numPtr := flag.Int("n", 4, "an integer")
flag.Parse()
sourceFile := flag.Arg(0)
if sourceFile == "" {
fmt.Println("Dude, you didn't pass in a tar file!")
os.Exit(1)
}
fmt.Println("arg 1: ", flag.Arg(0))
processFile(sourceFile, *numPtr)
}
func processFile(srcFile string, num int) {
f, err := os.Open(srcFile)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
defer f.Close()
gzf, err := gzip.NewReader(f)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
tarReader := tar.NewReader(gzf)
i := 0
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
fmt.Println(err)
os.Exit(1)
}
name := header.Name
switch header.Typeflag {
case tar.TypeDir:
continue
case tar.TypeReg:
fmt.Println("(", i, ")", "Name: ", name)
if i == num {
fmt.Println(" --- ")
io.Copy(os.Stdout, tarReader)
fmt.Println(" --- ")
os.Exit(0)
}
default:
fmt.Printf("%s : %c %s %s\n",
"Yikes! Unable to figure out type",
header.Typeflag,
"in file",
name,
)
}
i++
}
}
@ngrande

This comment has been minimized.

Show comment
Hide comment
@ngrande

ngrande Nov 14, 2016

Nice! Thanks

i edited the code slightly to make it usable as a package and to actually output the files (write on disk):

package main

import (
	"archive/tar"
	"compress/gzip"
	"flag"
	"fmt"
	"io"
	"io/ioutil"
	"os"
)

func main() {
	// get the arguments from the command line
	flag.Int("n", 4, "an integer")
	flag.Parse()

	sourceFile := flag.Arg(0)

	if sourceFile == "" {
		fmt.Println("Dude, you didn't pass in a tar file!")
		os.Exit(1)
	}

	fmt.Println("arg 1: ", flag.Arg(0))

	processFile(sourceFile)
}

// ReadPackedFile is a function to unpack a tar.gz
func ReadPackedFile(filepath string) {
	if filepath == "" {
		panic("Empty input!")
	}

	processFile(filepath)
}

func processFile(srcFile string) {

	f, err := os.Open(srcFile)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}
	defer f.Close()

	gzf, err := gzip.NewReader(f)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	tarReader := tar.NewReader(gzf)
	// defer io.Copy(os.Stdout, tarReader)

	for true {
		header, err := tarReader.Next()

		if err == io.EOF {
			break
		}

		if err != nil {
			fmt.Println(err)
			os.Exit(1)
		}

		name := header.Name

		switch header.Typeflag {
		case tar.TypeDir: // = directory
			fmt.Println("Directory:", name)
			os.Mkdir(name, 0755)
		case tar.TypeReg: // = regular file
			fmt.Println("Regular file:", name)
			data := make([]byte, header.Size)
			_, err := tarReader.Read(data)
			if err != nil {
				panic("Error reading file!!! PANIC!!!!!!")
			}

			ioutil.WriteFile(name, data, 0755)
		default:
			fmt.Printf("%s : %c %s %s\n",
				"Yikes! Unable to figure out type",
				header.Typeflag,
				"in file",
				name,
			)
		}
	}
}
```

ngrande commented Nov 14, 2016

Nice! Thanks

i edited the code slightly to make it usable as a package and to actually output the files (write on disk):

package main

import (
	"archive/tar"
	"compress/gzip"
	"flag"
	"fmt"
	"io"
	"io/ioutil"
	"os"
)

func main() {
	// get the arguments from the command line
	flag.Int("n", 4, "an integer")
	flag.Parse()

	sourceFile := flag.Arg(0)

	if sourceFile == "" {
		fmt.Println("Dude, you didn't pass in a tar file!")
		os.Exit(1)
	}

	fmt.Println("arg 1: ", flag.Arg(0))

	processFile(sourceFile)
}

// ReadPackedFile is a function to unpack a tar.gz
func ReadPackedFile(filepath string) {
	if filepath == "" {
		panic("Empty input!")
	}

	processFile(filepath)
}

func processFile(srcFile string) {

	f, err := os.Open(srcFile)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}
	defer f.Close()

	gzf, err := gzip.NewReader(f)
	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	tarReader := tar.NewReader(gzf)
	// defer io.Copy(os.Stdout, tarReader)

	for true {
		header, err := tarReader.Next()

		if err == io.EOF {
			break
		}

		if err != nil {
			fmt.Println(err)
			os.Exit(1)
		}

		name := header.Name

		switch header.Typeflag {
		case tar.TypeDir: // = directory
			fmt.Println("Directory:", name)
			os.Mkdir(name, 0755)
		case tar.TypeReg: // = regular file
			fmt.Println("Regular file:", name)
			data := make([]byte, header.Size)
			_, err := tarReader.Read(data)
			if err != nil {
				panic("Error reading file!!! PANIC!!!!!!")
			}

			ioutil.WriteFile(name, data, 0755)
		default:
			fmt.Printf("%s : %c %s %s\n",
				"Yikes! Unable to figure out type",
				header.Typeflag,
				"in file",
				name,
			)
		}
	}
}
```
@gudmundur

This comment has been minimized.

Show comment
Hide comment
@gudmundur

gudmundur Dec 9, 2016

When writing it to a file, the example above is missing the updating of atime and mtime of the file.

When writing it to a file, the example above is missing the updating of atime and mtime of the file.

@smil2k

This comment has been minimized.

Show comment
Hide comment
@smil2k

smil2k Mar 28, 2017

More over, it will fail in case the file is too large. (more than a couple of kb),

You need to change the Read/WriteFile into:
f, _ := os.Create(name)
defer f.Close()
io.Copy(f, tarReader)

This will safely copy everything. (just don't forget to make proper error handling)

smil2k commented Mar 28, 2017

More over, it will fail in case the file is too large. (more than a couple of kb),

You need to change the Read/WriteFile into:
f, _ := os.Create(name)
defer f.Close()
io.Copy(f, tarReader)

This will safely copy everything. (just don't forget to make proper error handling)

@joonas-fi

This comment has been minimized.

Show comment
Hide comment
@joonas-fi

joonas-fi Jun 13, 2017

Thank you all! Here's an improved non-buffering version with error checking that only takes in an io.Reader (works for files, HTTP responses etc.):

package tarextract

// hat tip https://gist.github.com/indraniel/1a91458984179ab4cf80

import (
	"archive/tar"
	"compress/gzip"
	"io"
	"log"
	"os"
)

func ExtractTarGz(gzipStream io.Reader) {
	uncompressedStream, err := gzip.NewReader(gzipStream)
	if err != nil {
		log.Fatal("ExtractTarGz: NewReader failed")
	}

	tarReader := tar.NewReader(uncompressedStream)

	for true {
		header, err := tarReader.Next()

		if err == io.EOF {
			break
		}

		if err != nil {
			log.Fatalf("ExtractTarGz: Next() failed: %s", err.Error())
		}

		switch header.Typeflag {
		case tar.TypeDir:
			if err := os.Mkdir(header.Name, 0755); err != nil {
				log.Fatalf("ExtractTarGz: Mkdir() failed: %s", err.Error())
			}
		case tar.TypeReg:
			outFile, err := os.Create(header.Name)
			if err != nil {
				log.Fatalf("ExtractTarGz: Create() failed: %s", err.Error())
			}
			defer outFile.Close()
			if _, err := io.Copy(outFile, tarReader); err != nil {
				log.Fatalf("ExtractTarGz: Copy() failed: %s", err.Error())
			}
		default:
			log.Fatalf(
				"ExtractTarGz: uknown type: %s in %s",
				header.Typeflag,
				header.Name)
		}
	}
}

Thank you all! Here's an improved non-buffering version with error checking that only takes in an io.Reader (works for files, HTTP responses etc.):

package tarextract

// hat tip https://gist.github.com/indraniel/1a91458984179ab4cf80

import (
	"archive/tar"
	"compress/gzip"
	"io"
	"log"
	"os"
)

func ExtractTarGz(gzipStream io.Reader) {
	uncompressedStream, err := gzip.NewReader(gzipStream)
	if err != nil {
		log.Fatal("ExtractTarGz: NewReader failed")
	}

	tarReader := tar.NewReader(uncompressedStream)

	for true {
		header, err := tarReader.Next()

		if err == io.EOF {
			break
		}

		if err != nil {
			log.Fatalf("ExtractTarGz: Next() failed: %s", err.Error())
		}

		switch header.Typeflag {
		case tar.TypeDir:
			if err := os.Mkdir(header.Name, 0755); err != nil {
				log.Fatalf("ExtractTarGz: Mkdir() failed: %s", err.Error())
			}
		case tar.TypeReg:
			outFile, err := os.Create(header.Name)
			if err != nil {
				log.Fatalf("ExtractTarGz: Create() failed: %s", err.Error())
			}
			defer outFile.Close()
			if _, err := io.Copy(outFile, tarReader); err != nil {
				log.Fatalf("ExtractTarGz: Copy() failed: %s", err.Error())
			}
		default:
			log.Fatalf(
				"ExtractTarGz: uknown type: %s in %s",
				header.Typeflag,
				header.Name)
		}
	}
}

@surajnarwade

This comment has been minimized.

Show comment
Hide comment
@surajnarwade

surajnarwade Dec 11, 2017

How to write unit test for the same ?

How to write unit test for the same ?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment