Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jccr/e908d99e82731f29b748afec6f12132e to your computer and use it in GitHub Desktop.
Save jccr/e908d99e82731f29b748afec6f12132e to your computer and use it in GitHub Desktop.
golang/go/archive/zip append patch
From 5975979e6d48dca82ec87835fb3d818273d8dfce Mon Sep 17 00:00:00 2001
From: Juan Carlos Corona Romero <c@rlos.email>
Date: Mon, 16 Mar 2020 00:22:59 -0700
Subject: [PATCH] archive/zip: experimental append feature
---
reader.go | 18 ++++++++++
writer.go | 87 +++++++++++++++++++++++++++++++++++++++++++----
writer_test.go | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 191 insertions(+), 6 deletions(-)
diff --git a/reader.go b/reader.go
index 13ff9ddcf4..4b97d947b0 100644
--- a/reader.go
+++ b/reader.go
@@ -26,6 +26,8 @@ type Reader struct {
File []*File
Comment string
decompressors map[uint16]Decompressor
+ size int64
+ dirOffset int64
}
type ReadCloser struct {
@@ -84,6 +86,8 @@ func (z *Reader) init(r io.ReaderAt, size int64) error {
return err
}
z.r = r
+ z.size = size
+ z.dirOffset = int64(end.directoryOffset)
z.File = make([]*File, 0, end.directoryRecords)
z.Comment = end.comment
rs := io.NewSectionReader(r, 0, size)
@@ -178,6 +182,20 @@ func (f *File) Open() (io.ReadCloser, error) {
return rc, nil
}
+// TODO: Document.
+func (z *Reader) AppendOffset() int64 {
+ return z.dirOffset
+}
+
+// Append appends entries to the existing zip archive represented by z.
+// The writer w should be positioned at the end of the archive data.
+// When the returned writer is closed, any entries with names that
+// already exist in the archive will have been "replaced" by the new
+// entries, although the original data will still be there.
+func (z *Reader) Append(w io.Writer) *Writer {
+ return newAppendingWriter(z, w)
+}
+
type checksumReader struct {
rc io.ReadCloser
hash hash.Hash32
diff --git a/writer.go b/writer.go
index cdc534eaf0..569c9ccf15 100644
--- a/writer.go
+++ b/writer.go
@@ -27,6 +27,7 @@ type Writer struct {
last *fileWriter
closed bool
compressors map[uint16]Compressor
+ names map[string]int // filename -> index in dir slice.
comment string
// testHookCloseSizeOffset if non-nil is called with the size
@@ -55,6 +56,25 @@ func (w *Writer) SetOffset(n int64) {
w.cw.count = n
}
+func newAppendingWriter(r *Reader, fw io.Writer) *Writer {
+ w := &Writer{
+ cw: &countWriter{
+ w: bufio.NewWriter(fw),
+ count: r.AppendOffset(),
+ },
+ dir: make([]*header, len(r.File), len(r.File)*3/2),
+ names: make(map[string]int),
+ }
+ for i, f := range r.File {
+ w.dir[i] = &header{
+ FileHeader: &f.FileHeader,
+ offset: uint64(f.headerOffset),
+ }
+ w.names[f.Name] = i
+ }
+ return w
+}
+
// Flush flushes any buffered data to the underlying writer.
// Calling Flush is not normally necessary; calling Close is sufficient.
func (w *Writer) Flush() error {
@@ -87,7 +107,14 @@ func (w *Writer) Close() error {
// write central directory
start := w.cw.count
+ records := uint64(0)
for _, h := range w.dir {
+ if h.FileHeader == nil {
+ // This entry has been superceded by a later
+ // appended entry.
+ continue
+ }
+ records++
var buf [directoryHeaderLen]byte
b := writeBuf(buf[:])
b.uint32(uint32(directoryHeaderSignature))
@@ -144,7 +171,6 @@ func (w *Writer) Close() error {
}
end := w.cw.count
- records := uint64(len(w.dir))
size := uint64(end - start)
offset := uint64(start)
@@ -222,6 +248,15 @@ func (w *Writer) Create(name string) (io.Writer, error) {
return w.CreateHeader(header)
}
+func (w *Writer) closeLastWriter() error {
+ if w.last != nil && !w.last.closed {
+ err := w.last.close()
+ w.last = nil
+ return err
+ }
+ return nil
+}
+
// detectUTF8 reports whether s is a valid UTF-8 string, and whether the string
// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII,
// or any other common encoding).
@@ -253,15 +288,20 @@ func detectUTF8(s string) (valid, require bool) {
// The file's contents must be written to the io.Writer before the next
// call to Create, CreateHeader, or Close.
func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
- if w.last != nil && !w.last.closed {
- if err := w.last.close(); err != nil {
- return nil, err
- }
+ if err := w.closeLastWriter(); err != nil {
+ return nil, err
}
if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
// See https://golang.org/issue/11144 confusion.
return nil, errors.New("archive/zip: invalid duplicate FileHeader")
}
+ if i, ok := w.names[fh.Name]; ok {
+ // We're appending a file that existed already,
+ // so clear out the old entry so that it won't
+ // be added to the index.
+ w.dir[i].FileHeader = nil
+ delete(w.names, fh.Name)
+ }
// The ZIP format has a sad state of affairs regarding character encoding.
// Officially, the name and comment fields are supposed to be encoded
@@ -373,6 +413,37 @@ func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
return ow, nil
}
+// Copy copies the file f (obtained from a Reader) into w.
+// It copies the compressed form directly.
+func (w *Writer) Copy(f *File) error {
+ dataOffset, err := f.DataOffset()
+ if err != nil {
+ return err
+ }
+ if err := w.closeLastWriter(); err != nil {
+ return err
+ }
+
+ fh := f.FileHeader
+ h := &header{
+ FileHeader: &fh,
+ offset: uint64(w.cw.count),
+ }
+ fh.Flags |= 0x8 // we will write a data descriptor
+ w.dir = append(w.dir, h)
+
+ if err := writeHeader(w.cw, &fh); err != nil {
+ return err
+ }
+
+ r := io.NewSectionReader(f.zipr, dataOffset, int64(f.CompressedSize64))
+ if _, err := io.Copy(w.cw, r); err != nil {
+ return err
+ }
+
+ return writeDesc(w.cw, &fh)
+}
+
func writeHeader(w io.Writer, h *FileHeader) error {
const maxUint16 = 1<<16 - 1
if len(h.Name) > maxUint16 {
@@ -474,6 +545,10 @@ func (w *fileWriter) close() error {
fh.UncompressedSize = uint32(fh.UncompressedSize64)
}
+ return writeDesc(w.zipw, fh)
+}
+
+func writeDesc(w io.Writer, fh *FileHeader) error {
// Write data descriptor. This is more complicated than one would
// think, see e.g. comments in zipfile.c:putextended() and
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588.
@@ -495,7 +570,7 @@ func (w *fileWriter) close() error {
b.uint32(fh.CompressedSize)
b.uint32(fh.UncompressedSize)
}
- _, err := w.zipw.Write(buf)
+ _, err := w.Write(buf)
return err
}
diff --git a/writer_test.go b/writer_test.go
index 1fedfd85e8..b51d2eac14 100644
--- a/writer_test.go
+++ b/writer_test.go
@@ -247,6 +247,98 @@ func TestWriterTime(t *testing.T) {
}
}
+func TestWriterCopy(t *testing.T) {
+ // make a zip file
+ buf := new(bytes.Buffer)
+ w := NewWriter(buf)
+ for _, wt := range writeTests {
+ testCreate(t, w, &wt)
+ }
+ if err := w.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ // read it back
+ src, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
+ if err != nil {
+ t.Fatal(err)
+ }
+ for i, wt := range writeTests {
+ testReadFile(t, src.File[i], &wt)
+ }
+
+ // make a new zip file copying the old compressed data.
+ buf2 := new(bytes.Buffer)
+ dst := NewWriter(buf2)
+ for _, f := range src.File {
+ if err := dst.Copy(f); err != nil {
+ t.Fatal(err)
+ }
+ }
+ if err := dst.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ // read the new one back
+ r, err := NewReader(bytes.NewReader(buf2.Bytes()), int64(buf2.Len()))
+ if err != nil {
+ t.Fatal(err)
+ }
+ for i, wt := range writeTests {
+ testReadFile(t, r.File[i], &wt)
+ }
+}
+
+func TestAppend(t *testing.T) {
+ // write a zip file
+ buf := new(bytes.Buffer)
+ w := NewWriter(buf)
+
+ for _, wt := range writeTests {
+ testCreate(t, w, &wt)
+ }
+
+ if err := w.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ // read it back
+ r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ // append a file to it.
+ abuf := new(bytes.Buffer)
+ abuf.Write(buf.Bytes()[:r.AppendOffset()])
+ w = r.Append(abuf)
+
+ wt := WriteTest{
+ Name: "foo",
+ Data: []byte("Badgers, canines, weasels, owls, and snakes"),
+ Method: Store,
+ Mode: 0755,
+ }
+ testCreate(t, w, &wt)
+
+ if err := w.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ // read the whole thing back.
+ allBytes := abuf.Bytes()
+
+ r, err = NewReader(bytes.NewReader(allBytes), int64(len(allBytes)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ writeTests := append(writeTests[1:], wt)
+ for i, wt := range writeTests {
+ testReadFile(t, r.File[i], &wt)
+ }
+}
+
func TestWriterOffset(t *testing.T) {
largeData := make([]byte, 1<<17)
if _, err := rand.Read(largeData); err != nil {
--
2.25.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment