Last active
August 4, 2024 11:57
-
-
Save hhsprings/07d5fab81590aa016c8b43ad51aea3cf to your computer and use it in GitHub Desktop.
unpacking/repacking zip written by Golang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"archive/zip" | |
"archive/tar" | |
"compress/gzip" | |
//"compress/bzip2" // bzip2 package of standard library in Go 1.16 has no compression ability. | |
"github.com/dsnet/compress/bzip2" | |
"fmt" | |
"io" | |
"io/fs" | |
//"log" | |
"os" | |
"os/exec" | |
"path/filepath" | |
"strings" | |
"bytes" | |
"reflect" | |
"text/template" | |
"github.com/hellflame/argparse" | |
"github.com/axgle/mahonia" | |
"github.com/ulikunitz/xz" | |
"github.com/ulikunitz/xz/lzma" | |
"github.com/clbanning/mxj/v2" | |
"github.com/danwakefield/fnmatch" | |
) | |
/* | |
* ================================================= | |
* constants, global variables, etc. | |
* ================================================= | |
*/ | |
const ( | |
CONF_DEFAULT = `{{/* | |
configuration of zip2tar. | |
basically this file is a json-like format. | |
*/}} | |
{ | |
"unpackers": { | |
"zip": ["7za", "x", "{{.Src}}"], {{/* if zip is encrypted, use this via --prefer_external_unpacker */}} | |
"lzh": ["lha", "x", "{{.Src}}"], | |
"7z": ["7za", "x", "{{.Src}}"], | |
"rar": ["py", "-m", "rarfile", "-e", "{{.Src}}", "."] {{/* python binding of libunrar */}} | |
}, | |
"ignoredirpatterns": [ | |
"__MACOSX", | |
".git", | |
".hg", | |
".svn" | |
] | |
} | |
` | |
) | |
var ( | |
// dictCapExps maps preset values to exponent for dictionary | |
// capacity sizes. | |
lzmaDictCapExps = []uint{18, 20, 21, 22, 22, 23, 23, 24, 25, 26} | |
) | |
/* | |
* ================================================= | |
* helper functions | |
* ================================================= | |
*/ | |
func abspath(p string) string { | |
r, _ := filepath.Abs(p) | |
return r | |
} | |
func pathJoin(topdir, subdir string) (string, bool) { | |
topdir = filepath.Clean(topdir) | |
subdir = filepath.Clean(subdir) | |
resdir := filepath.Join(topdir, subdir) | |
return resdir, strings.HasPrefix( | |
abspath(resdir), abspath(topdir) + string(os.PathSeparator)) | |
} | |
func getatype(fn string, forread bool) (string, string) { | |
bn := strings.ToLower(filepath.Base(fn)) | |
comps := strings.Split(bn, ".") | |
if len(comps) < 2 { | |
return "", "" | |
} | |
ext1, ext2 := comps[len(comps) - 2], comps[len(comps) - 1] | |
if ext1 == "tar" { | |
return ext1, ext2 | |
} else if ext2 == "tar" { | |
return "tar", "" | |
} else if ext2 == "tgz" { | |
return "tar", "gz" | |
} else if ext2 == "tbz2" { | |
return "tar", "bz2" | |
} else if ext2 == "txz" { | |
return "tar", "xz" | |
} else if ext2 == "tlzma" { | |
return "tar", "lzma" | |
} else if ext2 == "zip" || ext2 == "egg" { | |
return "zip", "" | |
} | |
if forread { | |
r, err := zip.OpenReader(fn) | |
if err == nil { | |
r.Close() | |
return "zip", "" | |
} | |
} | |
return ext2, "" | |
} | |
func runExtnProc(args []string) error { | |
progpath, err := exec.LookPath(args[0]) | |
if err != nil { | |
return err | |
} | |
cmd := exec.Command(progpath, args[1:]...) | |
cmd.Stdin = os.Stdin | |
cmd.Stdout = os.Stdout | |
cmd.Stderr = os.Stderr | |
cmd.Run() | |
excd := cmd.ProcessState.ExitCode() | |
if excd != 0 { | |
return fmt.Errorf("external process had ended with status=%d", excd) | |
} | |
return nil | |
} | |
func runExtnUnpacker(args []string) (string, error) { | |
tdir, err := os.MkdirTemp("", "_tmp_" + filepath.Base(args[0])) | |
if err != nil { | |
return tdir, err | |
} | |
cdir, _ := filepath.Abs(".") | |
tdir, _ = filepath.Abs(tdir) | |
os.Chdir(tdir) | |
defer os.Chdir(cdir) | |
err = runExtnProc(args) | |
return tdir, err | |
} | |
/* | |
* ================================================= | |
* unarch components | |
* ================================================= | |
*/ | |
type UnarchArgs struct { | |
src *string | |
dst *string | |
dirheadEncoding *string | |
unpackers_conf map[string][]string | |
ignoredirpatterns []string | |
prefer_external_unpacker *bool | |
pattern_match *string | |
pattern_exclude_match *string | |
do_list *bool | |
} | |
func (arg *UnarchArgs) Filter(ent Entry) bool { | |
path, _ := ent.Path(".") | |
flag := fnmatch.FNM_PATHNAME | fnmatch.FNM_LEADING_DIR | |
res := true | |
if *arg.pattern_match != "" { | |
res = res && fnmatch.Match(*arg.pattern_match, path, flag) | |
} | |
if res && *arg.pattern_exclude_match != "" { | |
res = res && !fnmatch.Match(*arg.pattern_exclude_match, path, flag) | |
} | |
dir, _ := filepath.Split(path) | |
for _, pp := range strings.Split(filepath.ToSlash(dir), "/") { | |
for _, pat := range arg.ignoredirpatterns { | |
res = res && !fnmatch.Match(pat, filepath.Clean(pp), flag) | |
if !res { | |
break | |
} | |
} | |
if !res { | |
break | |
} | |
} | |
return res | |
} | |
type Entry interface { | |
Path(destdir string) (string, error) | |
FileInfo() fs.FileInfo | |
Open() (/*io.ReadCloser*/io.Reader, error) | |
} | |
type EntryWriter interface { | |
Write(ent Entry) error | |
} | |
/* | |
* | |
*/ | |
type ZipEntry struct { | |
Entry | |
f *zip.File | |
dirheadEncoding string | |
} | |
func (ent *ZipEntry) Path(destdir string) (string, error) { | |
name := ent.f.Name | |
if ent.f.NonUTF8 { | |
dec := mahonia.NewDecoder(ent.dirheadEncoding) | |
name = dec.ConvertString(name) | |
} | |
// Join, with checking for ZipSlip. More Info: http://bit.ly/2MsjAWE | |
fpath, valid := pathJoin(destdir, name) | |
if !valid { | |
return "", fmt.Errorf("%s: illegal file path", fpath) | |
} | |
return fpath, nil | |
} | |
func (ent *ZipEntry) FileInfo() fs.FileInfo { | |
return ent.f.FileInfo() | |
} | |
func (ent *ZipEntry) Open() (/*io.ReadCloser*/io.Reader, error) { | |
return ent.f.Open() | |
} | |
/* | |
* | |
*/ | |
type TarEntry struct { | |
Entry | |
f *tar.Header | |
tr *tar.Reader | |
} | |
func (ent *TarEntry) Path(destdir string) (string, error) { | |
name := ent.f.Name | |
fpath, valid := pathJoin(destdir, name) | |
if !valid { | |
return "", fmt.Errorf("%s: illegal file path", fpath) | |
} | |
return fpath, nil | |
} | |
func (ent *TarEntry) FileInfo() fs.FileInfo { | |
return ent.f.FileInfo() | |
} | |
func (ent *TarEntry) Open() (/*io.ReadCloser*/io.Reader, error) { | |
return ent.tr, nil | |
} | |
/* | |
* | |
*/ | |
type FsdiskEntry struct { | |
Entry | |
fpath string | |
vpath string | |
st os.FileInfo | |
} | |
func (ent *FsdiskEntry) Path(destdir string) (string, error) { | |
name := ent.vpath | |
fpath, _ := pathJoin(destdir, name) | |
return fpath, nil | |
} | |
func (ent *FsdiskEntry) FileInfo() fs.FileInfo { | |
return ent.st | |
} | |
func (ent *FsdiskEntry) Open() (/*io.ReadCloser*/io.Reader, error) { | |
return os.Open(ent.fpath) | |
} | |
/* | |
* | |
*/ | |
type EntryListWriter struct { | |
EntryWriter | |
destdir string | |
} | |
func (wr *EntryListWriter) Write(ent Entry) error { | |
fpath, err := ent.Path(wr.destdir) | |
if err != nil { | |
return err | |
} | |
if ent.FileInfo().IsDir() { | |
return nil | |
} | |
fmt.Println(filepath.ToSlash(fpath)) | |
//rc.Close() | |
return err | |
} | |
/* | |
* | |
*/ | |
type EntryDiskWriter struct { | |
EntryWriter | |
destdir string | |
} | |
func (wr *EntryDiskWriter) Write(ent Entry) error { | |
fpath, err := ent.Path(wr.destdir) | |
if err != nil { | |
return err | |
} | |
if ent.FileInfo().IsDir() { | |
os.MkdirAll(fpath, os.ModePerm) | |
return nil | |
} | |
if err = os.MkdirAll(filepath.Dir(fpath), os.ModePerm); err != nil { | |
return err | |
} | |
rc, err := ent.Open() | |
if err != nil { | |
return err | |
} | |
outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, ent.FileInfo().Mode()) | |
if err != nil { | |
return err | |
} | |
defer outFile.Close() | |
_, err = io.Copy(outFile, rc) | |
if err != nil { | |
return err | |
} | |
fmt.Println(filepath.ToSlash(fpath)) | |
//rc.Close() | |
return err | |
} | |
/* | |
* | |
*/ | |
type EntryZipWriter struct { | |
EntryWriter | |
dst *zip.Writer | |
} | |
func (wr *EntryZipWriter) Write(ent Entry) error { | |
fpath, err := ent.Path("") | |
if err != nil { | |
return err | |
} | |
if ent.FileInfo().IsDir() { | |
return nil | |
} | |
header, err := zip.FileInfoHeader(ent.FileInfo()) | |
if err != nil { | |
return err | |
} | |
header.Name = fpath | |
header.Method = zip.Deflate | |
rc, err := ent.Open() | |
if err != nil { | |
return err | |
} | |
hdrwrt, err := wr.dst.CreateHeader(header) | |
if err != nil { | |
return err | |
} | |
_, err = io.Copy(hdrwrt, rc) | |
if err != nil { | |
return err | |
} | |
fmt.Println(filepath.ToSlash(fpath)) | |
//rc.Close() | |
return err | |
} | |
/* | |
* | |
*/ | |
type EntryTarWriter struct { | |
EntryWriter | |
dst *tar.Writer | |
} | |
func (wr *EntryTarWriter) Write(ent Entry) error { | |
fpath, err := ent.Path("") | |
if err != nil { | |
return err | |
} | |
if ent.FileInfo().IsDir() { | |
return nil | |
} | |
rc, err := ent.Open() | |
if err != nil { | |
return err | |
} | |
hdr := &tar.Header{ | |
Name: fpath, | |
Mode: int64(ent.FileInfo().Mode()), | |
Size: int64(ent.FileInfo().Size()), | |
ModTime: ent.FileInfo().ModTime(), | |
Format: tar.FormatPAX, | |
} | |
if err := wr.dst.WriteHeader(hdr); err != nil { | |
return err | |
} | |
_, err = io.Copy(wr.dst, rc) | |
if err != nil { | |
return err | |
} | |
fmt.Println(filepath.ToSlash(fpath)) | |
//rc.Close() | |
return err | |
} | |
/* | |
* ======================== | |
* Unzip | |
* ======================== | |
*/ | |
func Unzip(arg UnarchArgs, writer EntryWriter) (error) { | |
r, err := zip.OpenReader(*arg.src) | |
if err != nil { | |
return err | |
} | |
defer r.Close() | |
for _, f := range r.File { | |
ent := ZipEntry{f: f, dirheadEncoding: *arg.dirheadEncoding} | |
if !arg.Filter(&ent) { | |
continue | |
} | |
err := writer.Write(&ent) | |
if err != nil { | |
fmt.Fprintf(os.Stderr, "%s: %s\n", ent.f.Name, err) | |
//return err | |
} | |
} | |
return nil | |
} | |
/* | |
* ======================== | |
* Untar | |
* ======================== | |
*/ | |
func uncompReader(fn string) (io.Reader, error) { | |
_, ext2 := getatype(fn, true) | |
r, err := os.Open(fn) | |
if ext2 == "" || err != nil { | |
return r, err | |
} | |
if ext2 == "gz" { | |
return gzip.NewReader(r) | |
} else if ext2 == "bz2" { | |
return bzip2.NewReader(r, nil) | |
} else if ext2 == "xz" { | |
return xz.NewReader(r) | |
} else if ext2 == "lzma" { | |
return lzma.NewReader(r) | |
} | |
panic("unknown format") | |
} | |
func Untar(arg UnarchArgs, writer EntryWriter) error { | |
r, err := uncompReader(*arg.src) | |
if err != nil { | |
return err | |
} | |
tr := tar.NewReader(r) | |
for { | |
header, err := tr.Next() | |
switch { | |
case err == io.EOF: | |
return nil | |
case err != nil: | |
return err | |
case header == nil: | |
continue | |
} | |
ent := TarEntry{tr: tr, f: header} | |
if !arg.Filter(&ent) { | |
continue | |
} | |
err = writer.Write(&ent) | |
if err != nil { | |
fmt.Fprintf(os.Stderr, "%s: %s\n", ent.f.Name, err) | |
//return err | |
} | |
} | |
return nil | |
} | |
/* | |
* ======================== | |
* Invoke external unpacker | |
* ======================== | |
*/ | |
func Unany(cmd []string, arg UnarchArgs, writer EntryWriter) error { | |
tdir, err := runExtnUnpacker(cmd) | |
if err != nil { | |
return err | |
} | |
defer os.RemoveAll(tdir) | |
*arg.src = tdir | |
return Dirwalk(arg, writer) | |
} | |
/* | |
* ======================== | |
* Dir walk | |
* ======================== | |
*/ | |
func Dirwalk(arg UnarchArgs, writer EntryWriter) error { | |
root := *arg.src | |
walk := func(fn string, fi os.FileInfo, err error) error { | |
if err != nil { | |
return err | |
} | |
fpath := fn | |
vpath, _ := filepath.Rel(root, fn) | |
ent := FsdiskEntry{fpath: fpath, vpath: vpath, st: fi} | |
if !arg.Filter(&ent) { | |
return nil | |
} | |
err = writer.Write(&ent) | |
if err != nil { | |
fmt.Fprintf(os.Stderr, "%s: %s\n", ent.fpath, err) | |
//return err | |
} | |
return nil | |
} | |
filepath.Walk(root, walk) | |
return nil | |
} | |
/* | |
* ================================================= | |
* MAIN | |
* ================================================= | |
*/ | |
func setupConfs() (UnarchArgs, error) { | |
getconfpath := func () string { | |
home := os.Getenv("USERPROFILE") | |
if home == "" { | |
home = os.Getenv("HOME") | |
} | |
return filepath.Join(home, ".zip2tar.go.conf.json") | |
} | |
getconf := func () string { | |
confpath := getconfpath() | |
_, err := os.Stat(confpath) | |
var f *os.File | |
if err != nil { | |
f, _ = os.OpenFile(confpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0744) | |
fmt.Fprintf(f, "%s\n", CONF_DEFAULT) | |
f.Close() | |
} | |
f, _ = os.Open(confpath) | |
defer f.Close() | |
cont, _ := io.ReadAll(f) | |
return string(cont) | |
} | |
parser := argparse.NewParser(os.Args[0], "zip2tar", nil) | |
arg := UnarchArgs{} | |
arg.dirheadEncoding = parser.String( | |
"e", "encoding", | |
&argparse.Option{ | |
Default: "shiftjis", | |
Help: "specify encoding of directory header (only for zip)", | |
}) | |
arg.prefer_external_unpacker = parser.Flag( | |
"U", "prefer_external_unpacker", &argparse.Option{}) | |
arg.pattern_match = parser.String( | |
"m", "fnmatch", | |
&argparse.Option{}) | |
arg.pattern_exclude_match = parser.String( | |
"x", "fnmatch_exclude", | |
&argparse.Option{}) | |
arg.do_list = parser.Flag( | |
"l", "list", &argparse.Option{}) | |
arg.src = parser.String("", "src", &argparse.Option{Positional: true}) | |
arg.dst = parser.String("", "dst", &argparse.Option{Positional: true}) | |
var err error | |
if err = parser.Parse(os.Args[1:]); err != nil { | |
return arg, err | |
} | |
*arg.src, _ = filepath.Abs(*arg.src) | |
conftmpl, err := template.New("zip2tarconf").Parse(getconf()) | |
if err != nil { | |
return arg, err | |
} | |
var tmplout bytes.Buffer | |
esrc := strings.ReplaceAll(strings.ReplaceAll(*arg.src, `\`, `\\`), `"`, `\"`) | |
err = conftmpl.Execute(&tmplout, struct{Src string}{Src: esrc}) | |
if err != nil { | |
return arg, err | |
} | |
arg.unpackers_conf = map[string][]string{} | |
conf, err := mxj.NewMapJson([]byte(tmplout.String())) | |
unpackers := conf["unpackers"].(map[string]interface{}) | |
for k, v := range unpackers { | |
cmdi := reflect.ValueOf(v) | |
var cmd []string | |
for i := 0; i < cmdi.Len(); i++ { | |
cmd = append(cmd, fmt.Sprint(cmdi.Index(i))) | |
} | |
arg.unpackers_conf[k] = cmd | |
} | |
if conf["ignoredirpatterns"] != nil { | |
igndirpats := conf["ignoredirpatterns"].([]interface{}) | |
for _, v := range igndirpats { | |
pati := reflect.ValueOf(v) | |
arg.ignoredirpatterns = append(arg.ignoredirpatterns, fmt.Sprint(pati)) | |
} | |
} | |
return arg, err | |
} | |
func main() { | |
arg, err := setupConfs() | |
if err != nil { | |
panic(err) | |
} | |
st, err := os.Stat(*arg.src) | |
if err != nil { | |
panic(err) | |
} | |
oext1, oext2 := getatype(*arg.dst, false) | |
var writer EntryWriter | |
if *arg.do_list { | |
writer = &EntryListWriter{destdir: *arg.dst} | |
} else if oext1 == "zip" { | |
wr, err := os.OpenFile(*arg.dst, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0744) | |
if err == nil { | |
zwr := zip.NewWriter(wr) | |
writer = &EntryZipWriter{dst: zwr} | |
defer zwr.Close() | |
} | |
} else if oext1 == "tar" { | |
wr, err := os.OpenFile(*arg.dst, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0744) | |
if err == nil { | |
var twr *tar.Writer | |
if oext2 == "gz" { | |
gzwr, _ := gzip.NewWriterLevel(wr, 9) | |
defer gzwr.Close() | |
twr = tar.NewWriter(gzwr) | |
} else if oext2 == "bz2" { | |
bz2wr, _ := bzip2.NewWriter(wr, &bzip2.WriterConfig{Level: 9}) | |
defer bz2wr.Close() | |
twr = tar.NewWriter(bz2wr) | |
} else if oext2 == "xz" { | |
cfg := xz.WriterConfig{DictCap: 1 << lzmaDictCapExps[9]} | |
xzwr, _ := cfg.NewWriter(wr) | |
defer xzwr.Close() | |
twr = tar.NewWriter(xzwr) | |
} else if oext2 == "lzma" { | |
// sample code copied from https://github.com/ulikunitz/xz/blob/v0.5.10/cmd/gxz/file.go | |
cfg := lzma.WriterConfig{ | |
Properties: &lzma.Properties{LC: 3, LP: 0, PB: 2}, | |
DictCap: 1 << lzmaDictCapExps[9], | |
} | |
xzwr, _ := cfg.NewWriter(wr) | |
defer xzwr.Close() | |
twr = tar.NewWriter(xzwr) | |
} else { | |
twr = tar.NewWriter(wr) | |
} | |
writer = &EntryTarWriter{dst: twr} | |
defer twr.Close() | |
} | |
} else { | |
writer = &EntryDiskWriter{destdir: *arg.dst} | |
} | |
if err == nil { | |
iext1, _ := getatype(*arg.src, true) | |
if st.IsDir() { | |
err = Dirwalk(arg, writer) | |
} else { | |
cmd, ke := arg.unpackers_conf[iext1] | |
if ke && (*arg.prefer_external_unpacker || | |
(iext1 != "zip" && iext1 != "tar")) { | |
err = Unany(cmd, arg, writer) | |
} else { | |
if iext1 == "zip" { | |
err = Unzip(arg, writer) | |
} else { | |
err = Untar(arg, writer) | |
} | |
} | |
} | |
} | |
if err != nil { | |
panic(err) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment