Skip to content

Instantly share code, notes, and snippets.

@vbatts
Last active August 29, 2015 14:23
Show Gist options
  • Save vbatts/010494540a4d46e2f516 to your computer and use it in GitHub Desktop.
Save vbatts/010494540a4d46e2f516 to your computer and use it in GitHub Desktop.
validating tar-split, with golang `compress/gzip`

Output

vbatts@valse ~/tmp/010494540a4d46e2f516 (master) $ go build app.go
vbatts@valse ~/tmp/010494540a4d46e2f516 (master) $ ./app
Starting Sum on "/tmp/tar-split-test.160191501": 2b1c1c203e6bb0ab10c14db7361ede2402ef27468f901f979fb7e8ff8cf3d8c5
Ending Sum on "/tmp/tar-split-test.160191501": 2b1c1c203e6bb0ab10c14db7361ede2402ef27468f901f979fb7e8ff8cf3d8c5

What is happening here?

This application is using the golang library github.com/vbatts/tar-split to dissamble and reassemble the intermediate TAR archive. This process is reproducible, but there was question regarding compression.

In this example, a TAR archive is created with the file contents of this executable itself, the "app.go" source, and "README.md". This archive is gzip compressed, using golang's stdlib compress/gzip. The sha256 checksum of this tar.gz is displayed.

Then from this decomposed archive, it is reassembled, and then compressed with compress/gzip at the same compression level. The resulting reassembled tar.gz's sha256 checksum is displayed.

Metadata

To inspect the files, pass the flag -keep and the temporary files will be left for you to review. You can then see the tar.gz tempfile produced, as well as the packer json document that is created during disassembly, and used for reassembly.

package main
import (
"archive/tar"
"compress/gzip"
"crypto/sha256"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"github.com/vbatts/tar-split/tar/asm"
"github.com/vbatts/tar-split/tar/storage"
)
var (
flKeep = flag.Bool("keep", false, "keep tempfiles around")
)
func main() {
flag.Parse()
// files to add to the archive
tarFileList := []string{os.Args[0], "app.go", "README.md"}
keepFileList := []string{}
// 1) create a tar.gz && 2) get the sha256 of the tar.gz
h := sha256.New()
tf, err := ioutil.TempFile("", "tar-split-test.")
if err != nil {
log.Fatal(err)
}
if !*flKeep {
defer os.Remove(tf.Name())
} else {
keepFileList = append(keepFileList, tf.Name())
}
defer tf.Close()
mw := io.MultiWriter(h, tf)
zw := gzip.NewWriter(mw)
tw := tar.NewWriter(zw)
// add some files to the tar archive
for _, file := range tarFileList {
func() {
fh, err := os.Open(file)
if err != nil {
log.Fatal(err)
}
defer fh.Close()
fi, err := fh.Stat()
if err != nil {
log.Fatal(err)
}
hdr, err := tar.FileInfoHeader(fi, "")
if err != nil {
log.Fatal(err)
}
if err := tw.WriteHeader(hdr); err != nil {
log.Fatal(err)
}
if _, err := io.Copy(tw, fh); err != nil {
log.Fatal(err)
}
}()
}
if err := tw.Close(); err != nil {
log.Fatal(err)
}
if err := zw.Close(); err != nil {
log.Fatal(err)
}
checksumIn := h.Sum(nil)
fmt.Printf("Starting Sum on %q: %x\n", tf.Name(), checksumIn)
// 3) extract (and inflate) the tar and disassemble it
if _, err := tf.Seek(0, 0); err != nil {
log.Fatal(err)
}
gr, err := gzip.NewReader(tf)
if err != nil {
log.Fatal(err)
}
metaPackerFile, err := ioutil.TempFile("", "tar-split.packer.json.")
if err != nil {
log.Fatal(err)
}
if !*flKeep {
defer os.Remove(metaPackerFile.Name())
} else {
keepFileList = append(keepFileList, metaPackerFile.Name())
}
metaPacker := storage.NewJSONPacker(metaPackerFile)
// since we have the files in tarFileList are present locally, we can discard the bodies here
filePutter := storage.NewDiscardFilePutter()
its, err := asm.NewInputTarStream(gr, metaPacker, filePutter)
if err != nil {
log.Fatal(err)
}
// This is where the application would actuall extract the tar archive
if _, err := io.Copy(ioutil.Discard, its); err != nil {
log.Fatal(err)
}
if err := metaPackerFile.Sync(); err != nil {
log.Fatal(err)
}
// 4) assemble (and deflate) the tar
if _, err := metaPackerFile.Seek(0, 0); err != nil {
log.Fatal(err)
}
metaUnpacker := storage.NewJSONUnpacker(metaPackerFile)
fileGetter := storage.NewPathFileGetter(".")
ots := asm.NewOutputTarStream(fileGetter, metaUnpacker)
h.Reset() // get ready to get the checksum of the resulting tar.gz
gw := gzip.NewWriter(h)
tr := io.TeeReader(ots, gw)
if _, err := io.Copy(ioutil.Discard, tr); err != nil {
log.Fatal(err)
}
if err := ots.Close(); err != nil {
log.Fatal(err)
}
if err := gw.Close(); err != nil {
log.Fatal(err)
}
// 5) checksum it for validity
checksumOut := h.Sum(nil)
fmt.Printf("Ending Sum on %q: %x\n", tf.Name(), checksumOut)
if *flKeep {
fmt.Printf("Preserved temporary files %v\n", keepFileList)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment