Created
March 16, 2020 08:41
-
-
Save jccr/e908d99e82731f29b748afec6f12132e to your computer and use it in GitHub Desktop.
golang/go/archive/zip append patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 5975979e6d48dca82ec87835fb3d818273d8dfce Mon Sep 17 00:00:00 2001 | |
From: Juan Carlos Corona Romero <c@rlos.email> | |
Date: Mon, 16 Mar 2020 00:22:59 -0700 | |
Subject: [PATCH] archive/zip: experimental append feature | |
--- | |
reader.go | 18 ++++++++++ | |
writer.go | 87 +++++++++++++++++++++++++++++++++++++++++++---- | |
writer_test.go | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++ | |
3 files changed, 191 insertions(+), 6 deletions(-) | |
diff --git a/reader.go b/reader.go | |
index 13ff9ddcf4..4b97d947b0 100644 | |
--- a/reader.go | |
+++ b/reader.go | |
@@ -26,6 +26,8 @@ type Reader struct { | |
File []*File | |
Comment string | |
decompressors map[uint16]Decompressor | |
+ size int64 | |
+ dirOffset int64 | |
} | |
type ReadCloser struct { | |
@@ -84,6 +86,8 @@ func (z *Reader) init(r io.ReaderAt, size int64) error { | |
return err | |
} | |
z.r = r | |
+ z.size = size | |
+ z.dirOffset = int64(end.directoryOffset) | |
z.File = make([]*File, 0, end.directoryRecords) | |
z.Comment = end.comment | |
rs := io.NewSectionReader(r, 0, size) | |
@@ -178,6 +182,20 @@ func (f *File) Open() (io.ReadCloser, error) { | |
return rc, nil | |
} | |
+// TODO: Document. | |
+func (z *Reader) AppendOffset() int64 { | |
+ return z.dirOffset | |
+} | |
+ | |
+// Append appends entries to the existing zip archive represented by z. | |
+// The writer w should be positioned at the end of the archive data. | |
+// When the returned writer is closed, any entries with names that | |
+// already exist in the archive will have been "replaced" by the new | |
+// entries, although the original data will still be there. | |
+func (z *Reader) Append(w io.Writer) *Writer { | |
+ return newAppendingWriter(z, w) | |
+} | |
+ | |
type checksumReader struct { | |
rc io.ReadCloser | |
hash hash.Hash32 | |
diff --git a/writer.go b/writer.go | |
index cdc534eaf0..569c9ccf15 100644 | |
--- a/writer.go | |
+++ b/writer.go | |
@@ -27,6 +27,7 @@ type Writer struct { | |
last *fileWriter | |
closed bool | |
compressors map[uint16]Compressor | |
+ names map[string]int // filename -> index in dir slice. | |
comment string | |
// testHookCloseSizeOffset if non-nil is called with the size | |
@@ -55,6 +56,25 @@ func (w *Writer) SetOffset(n int64) { | |
w.cw.count = n | |
} | |
+func newAppendingWriter(r *Reader, fw io.Writer) *Writer { | |
+ w := &Writer{ | |
+ cw: &countWriter{ | |
+ w: bufio.NewWriter(fw), | |
+ count: r.AppendOffset(), | |
+ }, | |
+ dir: make([]*header, len(r.File), len(r.File)*3/2), | |
+ names: make(map[string]int), | |
+ } | |
+ for i, f := range r.File { | |
+ w.dir[i] = &header{ | |
+ FileHeader: &f.FileHeader, | |
+ offset: uint64(f.headerOffset), | |
+ } | |
+ w.names[f.Name] = i | |
+ } | |
+ return w | |
+} | |
+ | |
// Flush flushes any buffered data to the underlying writer. | |
// Calling Flush is not normally necessary; calling Close is sufficient. | |
func (w *Writer) Flush() error { | |
@@ -87,7 +107,14 @@ func (w *Writer) Close() error { | |
// write central directory | |
start := w.cw.count | |
+ records := uint64(0) | |
for _, h := range w.dir { | |
+ if h.FileHeader == nil { | |
+ // This entry has been superceded by a later | |
+ // appended entry. | |
+ continue | |
+ } | |
+ records++ | |
var buf [directoryHeaderLen]byte | |
b := writeBuf(buf[:]) | |
b.uint32(uint32(directoryHeaderSignature)) | |
@@ -144,7 +171,6 @@ func (w *Writer) Close() error { | |
} | |
end := w.cw.count | |
- records := uint64(len(w.dir)) | |
size := uint64(end - start) | |
offset := uint64(start) | |
@@ -222,6 +248,15 @@ func (w *Writer) Create(name string) (io.Writer, error) { | |
return w.CreateHeader(header) | |
} | |
+func (w *Writer) closeLastWriter() error { | |
+ if w.last != nil && !w.last.closed { | |
+ err := w.last.close() | |
+ w.last = nil | |
+ return err | |
+ } | |
+ return nil | |
+} | |
+ | |
// detectUTF8 reports whether s is a valid UTF-8 string, and whether the string | |
// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII, | |
// or any other common encoding). | |
@@ -253,15 +288,20 @@ func detectUTF8(s string) (valid, require bool) { | |
// The file's contents must be written to the io.Writer before the next | |
// call to Create, CreateHeader, or Close. | |
func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) { | |
- if w.last != nil && !w.last.closed { | |
- if err := w.last.close(); err != nil { | |
- return nil, err | |
- } | |
+ if err := w.closeLastWriter(); err != nil { | |
+ return nil, err | |
} | |
if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh { | |
// See https://golang.org/issue/11144 confusion. | |
return nil, errors.New("archive/zip: invalid duplicate FileHeader") | |
} | |
+ if i, ok := w.names[fh.Name]; ok { | |
+ // We're appending a file that existed already, | |
+ // so clear out the old entry so that it won't | |
+ // be added to the index. | |
+ w.dir[i].FileHeader = nil | |
+ delete(w.names, fh.Name) | |
+ } | |
// The ZIP format has a sad state of affairs regarding character encoding. | |
// Officially, the name and comment fields are supposed to be encoded | |
@@ -373,6 +413,37 @@ func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) { | |
return ow, nil | |
} | |
+// Copy copies the file f (obtained from a Reader) into w. | |
+// It copies the compressed form directly. | |
+func (w *Writer) Copy(f *File) error { | |
+ dataOffset, err := f.DataOffset() | |
+ if err != nil { | |
+ return err | |
+ } | |
+ if err := w.closeLastWriter(); err != nil { | |
+ return err | |
+ } | |
+ | |
+ fh := f.FileHeader | |
+ h := &header{ | |
+ FileHeader: &fh, | |
+ offset: uint64(w.cw.count), | |
+ } | |
+ fh.Flags |= 0x8 // we will write a data descriptor | |
+ w.dir = append(w.dir, h) | |
+ | |
+ if err := writeHeader(w.cw, &fh); err != nil { | |
+ return err | |
+ } | |
+ | |
+ r := io.NewSectionReader(f.zipr, dataOffset, int64(f.CompressedSize64)) | |
+ if _, err := io.Copy(w.cw, r); err != nil { | |
+ return err | |
+ } | |
+ | |
+ return writeDesc(w.cw, &fh) | |
+} | |
+ | |
func writeHeader(w io.Writer, h *FileHeader) error { | |
const maxUint16 = 1<<16 - 1 | |
if len(h.Name) > maxUint16 { | |
@@ -474,6 +545,10 @@ func (w *fileWriter) close() error { | |
fh.UncompressedSize = uint32(fh.UncompressedSize64) | |
} | |
+ return writeDesc(w.zipw, fh) | |
+} | |
+ | |
+func writeDesc(w io.Writer, fh *FileHeader) error { | |
// Write data descriptor. This is more complicated than one would | |
// think, see e.g. comments in zipfile.c:putextended() and | |
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588. | |
@@ -495,7 +570,7 @@ func (w *fileWriter) close() error { | |
b.uint32(fh.CompressedSize) | |
b.uint32(fh.UncompressedSize) | |
} | |
- _, err := w.zipw.Write(buf) | |
+ _, err := w.Write(buf) | |
return err | |
} | |
diff --git a/writer_test.go b/writer_test.go | |
index 1fedfd85e8..b51d2eac14 100644 | |
--- a/writer_test.go | |
+++ b/writer_test.go | |
@@ -247,6 +247,98 @@ func TestWriterTime(t *testing.T) { | |
} | |
} | |
+func TestWriterCopy(t *testing.T) { | |
+ // make a zip file | |
+ buf := new(bytes.Buffer) | |
+ w := NewWriter(buf) | |
+ for _, wt := range writeTests { | |
+ testCreate(t, w, &wt) | |
+ } | |
+ if err := w.Close(); err != nil { | |
+ t.Fatal(err) | |
+ } | |
+ | |
+ // read it back | |
+ src, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) | |
+ if err != nil { | |
+ t.Fatal(err) | |
+ } | |
+ for i, wt := range writeTests { | |
+ testReadFile(t, src.File[i], &wt) | |
+ } | |
+ | |
+ // make a new zip file copying the old compressed data. | |
+ buf2 := new(bytes.Buffer) | |
+ dst := NewWriter(buf2) | |
+ for _, f := range src.File { | |
+ if err := dst.Copy(f); err != nil { | |
+ t.Fatal(err) | |
+ } | |
+ } | |
+ if err := dst.Close(); err != nil { | |
+ t.Fatal(err) | |
+ } | |
+ | |
+ // read the new one back | |
+ r, err := NewReader(bytes.NewReader(buf2.Bytes()), int64(buf2.Len())) | |
+ if err != nil { | |
+ t.Fatal(err) | |
+ } | |
+ for i, wt := range writeTests { | |
+ testReadFile(t, r.File[i], &wt) | |
+ } | |
+} | |
+ | |
+func TestAppend(t *testing.T) { | |
+ // write a zip file | |
+ buf := new(bytes.Buffer) | |
+ w := NewWriter(buf) | |
+ | |
+ for _, wt := range writeTests { | |
+ testCreate(t, w, &wt) | |
+ } | |
+ | |
+ if err := w.Close(); err != nil { | |
+ t.Fatal(err) | |
+ } | |
+ | |
+ // read it back | |
+ r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len())) | |
+ if err != nil { | |
+ t.Fatal(err) | |
+ } | |
+ | |
+ // append a file to it. | |
+ abuf := new(bytes.Buffer) | |
+ abuf.Write(buf.Bytes()[:r.AppendOffset()]) | |
+ w = r.Append(abuf) | |
+ | |
+ wt := WriteTest{ | |
+ Name: "foo", | |
+ Data: []byte("Badgers, canines, weasels, owls, and snakes"), | |
+ Method: Store, | |
+ Mode: 0755, | |
+ } | |
+ testCreate(t, w, &wt) | |
+ | |
+ if err := w.Close(); err != nil { | |
+ t.Fatal(err) | |
+ } | |
+ | |
+ // read the whole thing back. | |
+ allBytes := abuf.Bytes() | |
+ | |
+ r, err = NewReader(bytes.NewReader(allBytes), int64(len(allBytes))) | |
+ if err != nil { | |
+ t.Fatal(err) | |
+ } | |
+ | |
+ writeTests := append(writeTests[1:], wt) | |
+ for i, wt := range writeTests { | |
+ testReadFile(t, r.File[i], &wt) | |
+ } | |
+} | |
+ | |
func TestWriterOffset(t *testing.T) { | |
largeData := make([]byte, 1<<17) | |
if _, err := rand.Read(largeData); err != nil { | |
-- | |
2.25.1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment