Skip to content

Instantly share code, notes, and snippets.

@mimoo
Last active May 27, 2024 20:01
Show Gist options
  • Save mimoo/25fc9716e0f1353791f5908f94d6e726 to your computer and use it in GitHub Desktop.
Save mimoo/25fc9716e0f1353791f5908f94d6e726 to your computer and use it in GitHub Desktop.
How to compress a folder in Golang using tar and gzip (works with nested folders)
package main
import (
"archive/tar"
"bytes"
"compress/gzip"
"fmt"
"io"
"os"
"path/filepath"
)
func main() {
// tar + gzip
var buf bytes.Buffer
_ := compress("./folderToCompress", &buf)
// write the .tar.gzip
fileToWrite, err := os.OpenFile("./compress.tar.gzip", os.O_CREATE|os.O_RDWR, os.FileMode(600))
if err != nil {
panic(err)
}
if _, err := io.Copy(fileToWrite, &buf); err != nil {
panic(err)
}
// untar write
if err := untar(&buf, "./uncompressHere/"); err != nil {
// probably delete uncompressHere?
}
}
func compress(src string, buf io.Writer) error {
// tar > gzip > buf
zr := gzip.NewWriter(buf)
tw := tar.NewWriter(zr)
// walk through every file in the folder
filepath.Walk(src, func(file string, fi os.FileInfo, err error) error {
// generate tar header
header, err := tar.FileInfoHeader(fi, file)
if err != nil {
return err
}
// must provide real name
// (see https://golang.org/src/archive/tar/common.go?#L626)
header.Name = filepath.ToSlash(file)
// write header
if err := tw.WriteHeader(header); err != nil {
return err
}
// if not a dir, write file content
if !fi.IsDir() {
data, err := os.Open(file)
if err != nil {
return err
}
if _, err := io.Copy(tw, data); err != nil {
return err
}
}
return nil
})
// produce tar
if err := tw.Close(); err != nil {
return err
}
// produce gzip
if err := zr.Close(); err != nil {
return err
}
//
return nil
}
// check for path traversal and correct forward slashes
func validRelPath(p string) bool {
if p == "" || strings.Contains(p, `\`) || strings.HasPrefix(p, "/") || strings.Contains(p, "../") {
return false
}
return true
}
func uncompress(src io.Reader, dst string) error {
// ungzip
zr, err := gzip.NewReader(src)
if err != nil {
return err
}
// untar
tr := tar.NewReader(zr)
// uncompress each element
for {
header, err := tr.Next()
if err == io.EOF {
break // End of archive
}
if err != nil {
return err
}
target :=
// validate name against path traversal
if !validRelPath(header.Name) {
return fmt.Errorf("tar contained invalid name error %q\n", target)
}
// add dst + re-format slashes according to system
target = filepath.Join(dst, header.Name)
// if no join is needed, replace with ToSlash:
// target = filepath.ToSlash(header.Name)
// check the type
switch header.Typeflag {
// if its a dir and it doesn't exist create it (with 0755 permission)
case tar.TypeDir:
if _, err := os.Stat(target); err != nil {
if err := os.MkdirAll(target, 0755); err != nil {
return err
}
}
// if it's a file create it (with same permission)
case tar.TypeReg:
fileToWrite, err := os.OpenFile(target, os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode))
if err != nil {
return err
}
// copy over contents
if _, err := io.Copy(fileToWrite, tr); err != nil {
return err
}
// manually close here after each file operation; defering would cause each file close
// to wait until all operations have completed.
fileToWrite.Close()
}
}
}
@Templum
Copy link

Templum commented Oct 20, 2020

Missing target initialization on line L106

@mimoo
Copy link
Author

mimoo commented Oct 20, 2020

sorry -.- you can see how I did it for real in eureka

@Templum
Copy link

Templum commented Oct 20, 2020

@mimoo thanks for sharing :)

@aamaanaa
Copy link

aamaanaa commented May 27, 2024

this is what i came up with:

path := "/full/rel/path/to/folder"

	// zip the file, make sure it is compressed for faster speed
	var buf bytes.Buffer
	err := Compress(path, &buf)
	if err != nil {
		panic(err)
	}

	// write the compressed file to disk
	err = os.WriteFile(fmt.Sprintf("%s.%s", filepath.Base(path), "tar.gz"), buf.Bytes(), os.ModePerm)
	if err != nil {
		panic(err)
	}
	func Compress(src string, buf io.Writer) error {
	zr := gzip.NewWriter(buf)
	defer zr.Close()
	tw := tar.NewWriter(zr)
	defer tw.Close()

	return filepath.Walk(src, func(file string, fi os.FileInfo, err error) error {
		if err != nil {
			return err
		}

		header, err := tar.FileInfoHeader(fi, file)
		if err != nil {
			return err
		}

		relPath, err := filepath.Rel(src, file)
		if err != nil {
			return err
		}
		header.Name = filepath.ToSlash(relPath)

		if err := tw.WriteHeader(header); err != nil {
			return err
		}

		if !fi.IsDir() {
			data, err := os.Open(file)
			if err != nil {
				return err
			}
			defer data.Close()

			_, err = io.Copy(tw, data)
			if err != nil {
				return err
			}
		}
		return nil
	})
}

Here are the main differences, between my implementation and your old function

  1. Relative Paths: The old compress function uses the full path of each file in the tar header (header.Name = filepath.ToSlash(file)). This means that when you decompress the tar file, it will recreate the full directory structure. The new Compress function, on the other hand, uses relative paths (header.Name = filepath.ToSlash(relPath)). This means that when you decompress the tar file, it will only recreate the directory structure relative to the source directory.

  2. Error Handling: The old compress function does not handle errors returned by filepath.Walk. If there's an error while walking the directory or compressing a file, the function will continue to the next file. The new Compress function returns immediately if there's an error (if err != nil { return err }).

if you dont want the entire function to cease on a single error, in filepath.walk simply replace err with nil. nill causes filepath.Walk to continue. Depends kinda on your use case.

  1. Resource Management: Usedefer to ensure that the gzip writer and tar writer are closed, even if an error occurs to prevent resource leaks. The old compress function also closes these writers, but it does so manually at the end of the function. If an error occurs before these lines, the writers will not be closed.

  2. File Opening: In the new Compress function, the file is opened and deferred to close right after the check if the file info is not a directory. This ensures that each file is closed right after its contents have been copied. In the old compress function, the file is opened and not explicitly closed, which could potentially lead to open file descriptors.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment