Skip to content

Instantly share code, notes, and snippets.

@jeffharwell
Last active January 5, 2017 00:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeffharwell/2e8785a0ec8acc8539901e841d595394 to your computer and use it in GitHub Desktop.
Save jeffharwell/2e8785a0ec8acc8539901e841d595394 to your computer and use it in GitHub Desktop.
Patch and tests to fix GZIPInputStream closing prematurely when reading concatenated GZIP files through an HTTPInputStream
# HG changeset patch
# User Jeff Harwell <jharwell@fuller.edu>
# Date 1483564868 28800
# Wed Jan 04 13:21:08 2017 -0800
# Node ID a0c6f393b603a858ea970e3cd1d256ffd3be6789
# Parent e5b1823a897ea5848d39a8675de67a9d26af9b1f
Committed two new test to java/util/zip/GZIP to test for the InputStreams that
do not return the bytes remaining in the stream when their .available() method is called,
such as HTTPInputStream.
diff --git a/test/java/util/zip/GZIP/GZIPBufferEndsAtBoundry.java b/test/java/util/zip/GZIP/GZIPBufferEndsAtBoundry.java
new file mode 100644
--- /dev/null
+++ b/test/java/util/zip/GZIP/GZIPBufferEndsAtBoundry.java
@@ -0,0 +1,166 @@
+/* @test
+ * @summary Test that the GZIPInput stream does not close prematurely
+ * while processing a concatenated .gz from an InputStream
+ * subclass where .available() does not always return the
+ * number of bytes left in the stream (like HTTPInputStream)
+ * and the input stream read buffer empties on the boundry
+ * between two concatenated GZIP files.
+ */
+
+import java.io.*;
+import java.util.*;
+import java.util.zip.*;
+
+public class GZIPBufferEndsAtBoundry {
+ static class MockConcatInputStream extends ByteArrayInputStream {
+ // This input stream makes sure that it will hand
+ // GZIPInputStream a buffer that consists of just the end of the
+ // first GZIP byte array and at that point .available() will
+ // return 0.
+ // This simulate an InputStream subclass like HTTPInputStream
+ // where the read buffer happens to end on the boundry of two
+ // concatenated GZIP files and .available() returns 0 because the
+ // InputStream has to make a blocking call to refill the
+ // read buffer.
+ int secondgzipoffset; // where the second GZIP file will start
+ // in the bytearray
+ int currentreadindex; // our current read position
+ int bytearraylength;
+ MockConcatInputStream(byte[] bytearray, int offset) {
+ super(bytearray);
+ secondgzipoffset = offset;
+ bytearraylength = bytearray.length;
+ currentreadindex = 0;
+ }
+ public int read(byte[] b, int off, int length) {
+ // If the requested read crosses the boundry between
+ // the first and second GZIP files then just return
+ // the end of the first GZIP file. Otherwise read normally.
+ int bytesread = 0;
+ if (currentreadindex < secondgzipoffset) {
+ // We have not read past the first GZIP file yet
+ if (currentreadindex + length < secondgzipoffset) {
+ // Current read request doesn't get to the second GZIP file
+ bytesread = super.read(b, off, length);
+ currentreadindex += bytesread;
+ } else if (currentreadindex + length >= secondgzipoffset) {
+ // Currert read request runs past the end of the first GZIP file
+ // just return the first gzip file
+ int bytestoread = secondgzipoffset - currentreadindex;
+ bytesread = super.read(b, off, bytestoread);
+ currentreadindex += bytesread;
+ }
+ } else {
+ // we are past the start of the second GZIP file, read normally
+ bytesread = super.read(b, off, length);
+ currentreadindex += bytesread;
+ }
+ return bytesread;
+ }
+ public int available() {
+ // For HTTPInputStream (and InputStream in general)
+ // available() "Returns an estimate of the number of bytes that
+ // can be read (or skipped over) from this input stream without
+ // blocking by the next invocation of a method" - Java Docs
+ if (currentreadindex == secondgzipoffset) {
+ // we just returned the end of the first GZIP file
+ // return 0 to simulate needing to make a blocking IO
+ // call to get the rest of the stream
+ return(0);
+ } else if (currentreadindex < secondgzipoffset) {
+ return(secondgzipoffset - currentreadindex);
+ }
+ // We are already past the first GZIP file
+ return(bytearraylength - currentreadindex);
+ }
+ }
+
+ public static void main(String[] args) throws Throwable {
+ // Create a test GZIP byte array consisting of two random
+ // arrays of integers that have been GZIP'd and
+ // concatenated together.
+ Random rnd = new Random();
+ ByteArrayOutputStream srcBAOS = new ByteArrayOutputStream();
+ ByteArrayOutputStream dstBAOS = new ByteArrayOutputStream();
+ int members = 2;
+ int secondgzipoffset = 0;
+ int seconduncompoff = 15000;
+ for (int i = 0; i < members; i++) {
+ byte[] src = new byte[seconduncompoff];
+ rnd.nextBytes(src);
+ srcBAOS.write(src);
+
+ try (GZIPOutputStream gzos = new GZIPOutputStream(dstBAOS)) {
+ gzos.write(src);
+ }
+ if (secondgzipoffset == 0) {
+ // The number of bytes in the output stream
+ // once we GZIP the first array is going to
+ // be the offset for the start of the second
+ // GZIP file
+ secondgzipoffset = dstBAOS.size();
+ }
+ }
+ byte[] srcBytes = srcBAOS.toByteArray();
+ byte[] dstBytes = dstBAOS.toByteArray();
+ test(srcBytes, dstBytes, 1024, 512, secondgzipoffset, seconduncompoff);
+ }
+
+ private static void test(byte[] uncompressedBytes, byte[] compressedBytes,
+ int readBufSize, int gzisBufSize, int secondgzipoff, int seconduncompoffset)
+ throws Throwable
+ {
+ try (MockConcatInputStream mcis = new MockConcatInputStream(compressedBytes, secondgzipoff);
+ GZIPInputStream gzis = new GZIPInputStream(mcis, gzisBufSize))
+ {
+ byte[] result = new byte[uncompressedBytes.length + 10];
+ byte[] buf = new byte[readBufSize];
+ int n = 0;
+ int off = 0;
+
+ while ((n = gzis.read(buf, 0, buf.length)) != -1) {
+ System.arraycopy(buf, 0, result, off, n);
+ off += n;
+ // no range check, if overflow, let it fail
+ }
+ // Test to see if gzis available properly returns a 0 at the end of the stream
+ if (gzis.available() != 0) {
+ throw new RuntimeException(
+ "GZIPInputStream reading failed! " +
+ "Original uncompressed length = " + uncompressedBytes.length +
+ " bytes and read " + off + " bytes during decompression" +
+ " but GZIPInputStream.available() did not return 0 after" +
+ " .read() returned -1 indicating the end of the stream");
+ }
+ // Test to see if our result is the same length as the source uncompressed byte array
+ // and check the integrity of the partial result if any
+ if (off != uncompressedBytes.length) {
+ if (!Arrays.equals(Arrays.copyOf(uncompressedBytes, seconduncompoffset),
+ Arrays.copyOf(result, off))) {
+ throw new RuntimeException("GZIPInputStream reading failed! " +
+ "Original uncompressed length = " + uncompressedBytes.length +
+ " bytes but read " + off + " bytes during decompression" +
+ " and partial result is corrupt and DOES NOT EQUAL the" +
+ " source first uncompressed byte array.");
+ } else {
+ throw new RuntimeException("GZIPInputStream reading failed! " +
+ "Original uncompressed length = " + uncompressedBytes.length +
+ " bytes but read " + off + " bytes during decompression. Partial" +
+ " result is equal to the source first uncompressed byte array.");
+ }
+ }
+ // Test to make sure the resulting decompressed byte array is equal to the
+ // source byte array.
+ if (!Arrays.equals(uncompressedBytes, Arrays.copyOf(result, off))) {
+ throw new RuntimeException(
+ "GZIPInputStream reading failed! " +
+ "Original uncompressed length = " + uncompressedBytes.length +
+ " bytes and read " + off + " bytes during decompression" +
+ " but resulting byte array DID NOT EQUAL the source" +
+ " uncompressed byte array.");
+ }
+
+
+ }
+ }
+}
diff --git a/test/java/util/zip/GZIP/GZIPHttpStreamRead.java b/test/java/util/zip/GZIP/GZIPHttpStreamRead.java
new file mode 100644
--- /dev/null
+++ b/test/java/util/zip/GZIP/GZIPHttpStreamRead.java
@@ -0,0 +1,110 @@
+/* @test
+ * @summary Test that the GZIPInput stream does not close prematurely
+ * while processing a concatenated .gz from an InputStream
+ * subclass, such as an HTTPInputStream, where .available()
+ * does not return the number of bytes left in the stream.
+ */
+
+import java.io.*;
+import java.util.*;
+import java.util.zip.*;
+
+public class GZIPHttpStreamRead {
+ /** Guaranteed to return a value that is out-of-spec for read(). */
+
+ static class MockHttpInputStream extends ByteArrayInputStream {
+ // Simulates a slow HTTP Connection where only a few bytes
+ // at a time are being read
+ MockHttpInputStream(byte[] bytearray) {
+ super(bytearray);
+ }
+ public int read(byte[] b, int off, int length) {
+ // We are only reading a maximum of 10 bytes
+ // into the buffer, regardless of what is requested
+ // Simulating a slow network connection
+ int read_to = 10;
+ if (length < read_to) {
+ read_to = length;
+ }
+ int bytesread = super.read(b, off, read_to);
+ return bytesread;
+ }
+ public int available() {
+ // For HTTPInputStream (and InputStream in general)
+ // available() "Returns an estimate of the number of bytes that
+ // can be read (or skipped over) from this input stream without
+ // blocking by the next invocation of a method" - Java Docs
+ //
+ // A slow HTTP connection will often be in the situation where
+ // the InputBuffer is empty and it needs to go to the network to
+ // get more data. So we will always return 0 available.
+ return(0);
+ }
+ }
+
+ public static void main(String[] args) throws Throwable {
+ Random rnd = new Random();
+ for (int i = 1; i < 100; i++) {
+ int members = rnd.nextInt(10) + 1;
+
+ ByteArrayOutputStream srcBAOS = new ByteArrayOutputStream();
+ ByteArrayOutputStream dstBAOS = new ByteArrayOutputStream();
+ for (int j = 0; j < members; j++) {
+ byte[] src = new byte[rnd.nextInt(8192) + 1];
+ rnd.nextBytes(src);
+ srcBAOS.write(src);
+
+ try (GZIPOutputStream gzos = new GZIPOutputStream(dstBAOS)) {
+ gzos.write(src);
+ }
+ }
+ byte[] srcBytes = srcBAOS.toByteArray();
+ byte[] dstBytes = dstBAOS.toByteArray();
+ // try different size of buffer to read the
+ // GZIPInputStream
+ /* just for fun when running manually
+ for (int j = 1; j < 10; j++) {
+ test(srcBytes, dstBytes, j);
+ }
+ */
+ for (int j = 0; j < 10; j++) {
+ int readBufSZ = rnd.nextInt(2048) + 1;
+ test(srcBytes,
+ dstBytes,
+ readBufSZ,
+ 512); // the defualt buffer size
+ test(srcBytes,
+ dstBytes,
+ readBufSZ,
+ rnd.nextInt(4096) + 1);
+ }
+ }
+ }
+
+ private static void test(byte[] src, byte[] dst,
+ int readBufSize, int gzisBufSize)
+ throws Throwable
+ {
+ try (MockHttpInputStream bais = new MockHttpInputStream(dst);
+ GZIPInputStream gzis = new GZIPInputStream(bais, gzisBufSize))
+ {
+ byte[] result = new byte[src.length + 10];
+ byte[] buf = new byte[readBufSize];
+ int n = 0;
+ int off = 0;
+
+ while ((n = gzis.read(buf, 0, buf.length)) != -1) {
+ System.arraycopy(buf, 0, result, off, n);
+ off += n;
+ // no range check, if overflow, let it fail
+ }
+ if (off != src.length || gzis.available() != 0 ||
+ !Arrays.equals(src, Arrays.copyOf(result, off))) {
+ throw new RuntimeException(
+ "GZIPInputStream reading failed! " +
+ ", src.len=" + src.length +
+ ", read=" + off);
+ }
+ }
+ }
+}
# HG changeset patch
# User Jeff Harwell <jharwell@fuller.edu>
# Date 1483565634 28800
# Wed Jan 04 13:33:54 2017 -0800
# Node ID 97437f51e996766c89de2e0a8662f58e3c0d45cd
# Parent a0c6f393b603a858ea970e3cd1d256ffd3be6789
Patched GZIPInputStream to pass new Test Suite
diff --git a/src/share/classes/java/util/zip/GZIPInputStream.java b/src/share/classes/java/util/zip/GZIPInputStream.java
--- a/src/share/classes/java/util/zip/GZIPInputStream.java
+++ b/src/share/classes/java/util/zip/GZIPInputStream.java
@@ -224,23 +224,28 @@
(readUInt(in) != (inf.getBytesWritten() & 0xffffffffL)))
throw new ZipException("Corrupt GZIP trailer");
- // If there are more bytes available in "in" or
- // the leftover in the "inf" is > 26 bytes:
- // this.trailer(8) + next.header.min(10) + next.trailer(8)
- // try concatenated case
- if (this.in.available() > 0 || n > 26) {
- int m = 8; // this.trailer
- try {
- m += readHeader(in); // next.header
- } catch (IOException ze) {
- return true; // ignore any malformed, do nothing
- }
- inf.reset();
- if (n > m)
- inf.setInput(buf, len - n + m, n - m);
- return false;
+ // We cannot know if the input stream has remaining bytes so
+ // assume that it does and try to read the next header. If there isn't
+ // one we will get a EOFException (see this.readUByte). Catch
+ // this exception and return true (eos reached)
+ int m = 8; // this.trailer
+ try {
+ m += readHeader(in); // next.header
+ } catch (EOFException ze) {
+ // We are at the end of the stream
+ return true;
+ } catch (IOException ze) {
+ // We were not at the end of the stream but what was
+ // left in the stream didn't make any sense, ignore it
+ // we already have what we came for
+ return true;
}
- return true;
+ // We found a valid header for an additional GZIP file
+ inf.reset();
+ if (n > m) {
+ inf.setInput(buf, len - n + m, n - m);
+ }
+ return false; // there is more GZIP data, keep going (concatenated gzip data set)
}
/*
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment