Last active
January 5, 2017 00:59
-
-
Save jeffharwell/2e8785a0ec8acc8539901e841d595394 to your computer and use it in GitHub Desktop.
Patch and tests to fix GZIPInputStream closing prematurely when reading concatenated GZIP files through an HTTPInputStream
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# HG changeset patch | |
# User Jeff Harwell <jharwell@fuller.edu> | |
# Date 1483564868 28800 | |
# Wed Jan 04 13:21:08 2017 -0800 | |
# Node ID a0c6f393b603a858ea970e3cd1d256ffd3be6789 | |
# Parent e5b1823a897ea5848d39a8675de67a9d26af9b1f | |
Committed two new test to java/util/zip/GZIP to test for the InputStreams that | |
do not return the bytes remaining in the stream when their .available() method is called, | |
such as HTTPInputStream. | |
diff --git a/test/java/util/zip/GZIP/GZIPBufferEndsAtBoundry.java b/test/java/util/zip/GZIP/GZIPBufferEndsAtBoundry.java | |
new file mode 100644 | |
--- /dev/null | |
+++ b/test/java/util/zip/GZIP/GZIPBufferEndsAtBoundry.java | |
@@ -0,0 +1,166 @@ | |
+/* @test | |
+ * @summary Test that the GZIPInput stream does not close prematurely | |
+ * while processing a concatenated .gz from an InputStream | |
+ * subclass where .available() does not always return the | |
+ * number of bytes left in the stream (like HTTPInputStream) | |
+ * and the input stream read buffer empties on the boundry | |
+ * between two concatenated GZIP files. | |
+ */ | |
+ | |
+import java.io.*; | |
+import java.util.*; | |
+import java.util.zip.*; | |
+ | |
+public class GZIPBufferEndsAtBoundry { | |
+ static class MockConcatInputStream extends ByteArrayInputStream { | |
+ // This input stream makes sure that it will hand | |
+ // GZIPInputStream a buffer that consists of just the end of the | |
+ // first GZIP byte array and at that point .available() will | |
+ // return 0. | |
+ // This simulate an InputStream subclass like HTTPInputStream | |
+ // where the read buffer happens to end on the boundry of two | |
+ // concatenated GZIP files and .available() returns 0 because the | |
+ // InputStream has to make a blocking call to refill the | |
+ // read buffer. | |
+ int secondgzipoffset; // where the second GZIP file will start | |
+ // in the bytearray | |
+ int currentreadindex; // our current read position | |
+ int bytearraylength; | |
+ MockConcatInputStream(byte[] bytearray, int offset) { | |
+ super(bytearray); | |
+ secondgzipoffset = offset; | |
+ bytearraylength = bytearray.length; | |
+ currentreadindex = 0; | |
+ } | |
+ public int read(byte[] b, int off, int length) { | |
+ // If the requested read crosses the boundry between | |
+ // the first and second GZIP files then just return | |
+ // the end of the first GZIP file. Otherwise read normally. | |
+ int bytesread = 0; | |
+ if (currentreadindex < secondgzipoffset) { | |
+ // We have not read past the first GZIP file yet | |
+ if (currentreadindex + length < secondgzipoffset) { | |
+ // Current read request doesn't get to the second GZIP file | |
+ bytesread = super.read(b, off, length); | |
+ currentreadindex += bytesread; | |
+ } else if (currentreadindex + length >= secondgzipoffset) { | |
+ // Currert read request runs past the end of the first GZIP file | |
+ // just return the first gzip file | |
+ int bytestoread = secondgzipoffset - currentreadindex; | |
+ bytesread = super.read(b, off, bytestoread); | |
+ currentreadindex += bytesread; | |
+ } | |
+ } else { | |
+ // we are past the start of the second GZIP file, read normally | |
+ bytesread = super.read(b, off, length); | |
+ currentreadindex += bytesread; | |
+ } | |
+ return bytesread; | |
+ } | |
+ public int available() { | |
+ // For HTTPInputStream (and InputStream in general) | |
+ // available() "Returns an estimate of the number of bytes that | |
+ // can be read (or skipped over) from this input stream without | |
+ // blocking by the next invocation of a method" - Java Docs | |
+ if (currentreadindex == secondgzipoffset) { | |
+ // we just returned the end of the first GZIP file | |
+ // return 0 to simulate needing to make a blocking IO | |
+ // call to get the rest of the stream | |
+ return(0); | |
+ } else if (currentreadindex < secondgzipoffset) { | |
+ return(secondgzipoffset - currentreadindex); | |
+ } | |
+ // We are already past the first GZIP file | |
+ return(bytearraylength - currentreadindex); | |
+ } | |
+ } | |
+ | |
+ public static void main(String[] args) throws Throwable { | |
+ // Create a test GZIP byte array consisting of two random | |
+ // arrays of integers that have been GZIP'd and | |
+ // concatenated together. | |
+ Random rnd = new Random(); | |
+ ByteArrayOutputStream srcBAOS = new ByteArrayOutputStream(); | |
+ ByteArrayOutputStream dstBAOS = new ByteArrayOutputStream(); | |
+ int members = 2; | |
+ int secondgzipoffset = 0; | |
+ int seconduncompoff = 15000; | |
+ for (int i = 0; i < members; i++) { | |
+ byte[] src = new byte[seconduncompoff]; | |
+ rnd.nextBytes(src); | |
+ srcBAOS.write(src); | |
+ | |
+ try (GZIPOutputStream gzos = new GZIPOutputStream(dstBAOS)) { | |
+ gzos.write(src); | |
+ } | |
+ if (secondgzipoffset == 0) { | |
+ // The number of bytes in the output stream | |
+ // once we GZIP the first array is going to | |
+ // be the offset for the start of the second | |
+ // GZIP file | |
+ secondgzipoffset = dstBAOS.size(); | |
+ } | |
+ } | |
+ byte[] srcBytes = srcBAOS.toByteArray(); | |
+ byte[] dstBytes = dstBAOS.toByteArray(); | |
+ test(srcBytes, dstBytes, 1024, 512, secondgzipoffset, seconduncompoff); | |
+ } | |
+ | |
+ private static void test(byte[] uncompressedBytes, byte[] compressedBytes, | |
+ int readBufSize, int gzisBufSize, int secondgzipoff, int seconduncompoffset) | |
+ throws Throwable | |
+ { | |
+ try (MockConcatInputStream mcis = new MockConcatInputStream(compressedBytes, secondgzipoff); | |
+ GZIPInputStream gzis = new GZIPInputStream(mcis, gzisBufSize)) | |
+ { | |
+ byte[] result = new byte[uncompressedBytes.length + 10]; | |
+ byte[] buf = new byte[readBufSize]; | |
+ int n = 0; | |
+ int off = 0; | |
+ | |
+ while ((n = gzis.read(buf, 0, buf.length)) != -1) { | |
+ System.arraycopy(buf, 0, result, off, n); | |
+ off += n; | |
+ // no range check, if overflow, let it fail | |
+ } | |
+ // Test to see if gzis available properly returns a 0 at the end of the stream | |
+ if (gzis.available() != 0) { | |
+ throw new RuntimeException( | |
+ "GZIPInputStream reading failed! " + | |
+ "Original uncompressed length = " + uncompressedBytes.length + | |
+ " bytes and read " + off + " bytes during decompression" + | |
+ " but GZIPInputStream.available() did not return 0 after" + | |
+ " .read() returned -1 indicating the end of the stream"); | |
+ } | |
+ // Test to see if our result is the same length as the source uncompressed byte array | |
+ // and check the integrity of the partial result if any | |
+ if (off != uncompressedBytes.length) { | |
+ if (!Arrays.equals(Arrays.copyOf(uncompressedBytes, seconduncompoffset), | |
+ Arrays.copyOf(result, off))) { | |
+ throw new RuntimeException("GZIPInputStream reading failed! " + | |
+ "Original uncompressed length = " + uncompressedBytes.length + | |
+ " bytes but read " + off + " bytes during decompression" + | |
+ " and partial result is corrupt and DOES NOT EQUAL the" + | |
+ " source first uncompressed byte array."); | |
+ } else { | |
+ throw new RuntimeException("GZIPInputStream reading failed! " + | |
+ "Original uncompressed length = " + uncompressedBytes.length + | |
+ " bytes but read " + off + " bytes during decompression. Partial" + | |
+ " result is equal to the source first uncompressed byte array."); | |
+ } | |
+ } | |
+ // Test to make sure the resulting decompressed byte array is equal to the | |
+ // source byte array. | |
+ if (!Arrays.equals(uncompressedBytes, Arrays.copyOf(result, off))) { | |
+ throw new RuntimeException( | |
+ "GZIPInputStream reading failed! " + | |
+ "Original uncompressed length = " + uncompressedBytes.length + | |
+ " bytes and read " + off + " bytes during decompression" + | |
+ " but resulting byte array DID NOT EQUAL the source" + | |
+ " uncompressed byte array."); | |
+ } | |
+ | |
+ | |
+ } | |
+ } | |
+} | |
diff --git a/test/java/util/zip/GZIP/GZIPHttpStreamRead.java b/test/java/util/zip/GZIP/GZIPHttpStreamRead.java | |
new file mode 100644 | |
--- /dev/null | |
+++ b/test/java/util/zip/GZIP/GZIPHttpStreamRead.java | |
@@ -0,0 +1,110 @@ | |
+/* @test | |
+ * @summary Test that the GZIPInput stream does not close prematurely | |
+ * while processing a concatenated .gz from an InputStream | |
+ * subclass, such as an HTTPInputStream, where .available() | |
+ * does not return the number of bytes left in the stream. | |
+ */ | |
+ | |
+import java.io.*; | |
+import java.util.*; | |
+import java.util.zip.*; | |
+ | |
+public class GZIPHttpStreamRead { | |
+ /** Guaranteed to return a value that is out-of-spec for read(). */ | |
+ | |
+ static class MockHttpInputStream extends ByteArrayInputStream { | |
+ // Simulates a slow HTTP Connection where only a few bytes | |
+ // at a time are being read | |
+ MockHttpInputStream(byte[] bytearray) { | |
+ super(bytearray); | |
+ } | |
+ public int read(byte[] b, int off, int length) { | |
+ // We are only reading a maximum of 10 bytes | |
+ // into the buffer, regardless of what is requested | |
+ // Simulating a slow network connection | |
+ int read_to = 10; | |
+ if (length < read_to) { | |
+ read_to = length; | |
+ } | |
+ int bytesread = super.read(b, off, read_to); | |
+ return bytesread; | |
+ } | |
+ public int available() { | |
+ // For HTTPInputStream (and InputStream in general) | |
+ // available() "Returns an estimate of the number of bytes that | |
+ // can be read (or skipped over) from this input stream without | |
+ // blocking by the next invocation of a method" - Java Docs | |
+ // | |
+ // A slow HTTP connection will often be in the situation where | |
+ // the InputBuffer is empty and it needs to go to the network to | |
+ // get more data. So we will always return 0 available. | |
+ return(0); | |
+ } | |
+ } | |
+ | |
+ public static void main(String[] args) throws Throwable { | |
+ Random rnd = new Random(); | |
+ for (int i = 1; i < 100; i++) { | |
+ int members = rnd.nextInt(10) + 1; | |
+ | |
+ ByteArrayOutputStream srcBAOS = new ByteArrayOutputStream(); | |
+ ByteArrayOutputStream dstBAOS = new ByteArrayOutputStream(); | |
+ for (int j = 0; j < members; j++) { | |
+ byte[] src = new byte[rnd.nextInt(8192) + 1]; | |
+ rnd.nextBytes(src); | |
+ srcBAOS.write(src); | |
+ | |
+ try (GZIPOutputStream gzos = new GZIPOutputStream(dstBAOS)) { | |
+ gzos.write(src); | |
+ } | |
+ } | |
+ byte[] srcBytes = srcBAOS.toByteArray(); | |
+ byte[] dstBytes = dstBAOS.toByteArray(); | |
+ // try different size of buffer to read the | |
+ // GZIPInputStream | |
+ /* just for fun when running manually | |
+ for (int j = 1; j < 10; j++) { | |
+ test(srcBytes, dstBytes, j); | |
+ } | |
+ */ | |
+ for (int j = 0; j < 10; j++) { | |
+ int readBufSZ = rnd.nextInt(2048) + 1; | |
+ test(srcBytes, | |
+ dstBytes, | |
+ readBufSZ, | |
+ 512); // the defualt buffer size | |
+ test(srcBytes, | |
+ dstBytes, | |
+ readBufSZ, | |
+ rnd.nextInt(4096) + 1); | |
+ } | |
+ } | |
+ } | |
+ | |
+ private static void test(byte[] src, byte[] dst, | |
+ int readBufSize, int gzisBufSize) | |
+ throws Throwable | |
+ { | |
+ try (MockHttpInputStream bais = new MockHttpInputStream(dst); | |
+ GZIPInputStream gzis = new GZIPInputStream(bais, gzisBufSize)) | |
+ { | |
+ byte[] result = new byte[src.length + 10]; | |
+ byte[] buf = new byte[readBufSize]; | |
+ int n = 0; | |
+ int off = 0; | |
+ | |
+ while ((n = gzis.read(buf, 0, buf.length)) != -1) { | |
+ System.arraycopy(buf, 0, result, off, n); | |
+ off += n; | |
+ // no range check, if overflow, let it fail | |
+ } | |
+ if (off != src.length || gzis.available() != 0 || | |
+ !Arrays.equals(src, Arrays.copyOf(result, off))) { | |
+ throw new RuntimeException( | |
+ "GZIPInputStream reading failed! " + | |
+ ", src.len=" + src.length + | |
+ ", read=" + off); | |
+ } | |
+ } | |
+ } | |
+} | |
# HG changeset patch | |
# User Jeff Harwell <jharwell@fuller.edu> | |
# Date 1483565634 28800 | |
# Wed Jan 04 13:33:54 2017 -0800 | |
# Node ID 97437f51e996766c89de2e0a8662f58e3c0d45cd | |
# Parent a0c6f393b603a858ea970e3cd1d256ffd3be6789 | |
Patched GZIPInputStream to pass new Test Suite | |
diff --git a/src/share/classes/java/util/zip/GZIPInputStream.java b/src/share/classes/java/util/zip/GZIPInputStream.java | |
--- a/src/share/classes/java/util/zip/GZIPInputStream.java | |
+++ b/src/share/classes/java/util/zip/GZIPInputStream.java | |
@@ -224,23 +224,28 @@ | |
(readUInt(in) != (inf.getBytesWritten() & 0xffffffffL))) | |
throw new ZipException("Corrupt GZIP trailer"); | |
- // If there are more bytes available in "in" or | |
- // the leftover in the "inf" is > 26 bytes: | |
- // this.trailer(8) + next.header.min(10) + next.trailer(8) | |
- // try concatenated case | |
- if (this.in.available() > 0 || n > 26) { | |
- int m = 8; // this.trailer | |
- try { | |
- m += readHeader(in); // next.header | |
- } catch (IOException ze) { | |
- return true; // ignore any malformed, do nothing | |
- } | |
- inf.reset(); | |
- if (n > m) | |
- inf.setInput(buf, len - n + m, n - m); | |
- return false; | |
+ // We cannot know if the input stream has remaining bytes so | |
+ // assume that it does and try to read the next header. If there isn't | |
+ // one we will get a EOFException (see this.readUByte). Catch | |
+ // this exception and return true (eos reached) | |
+ int m = 8; // this.trailer | |
+ try { | |
+ m += readHeader(in); // next.header | |
+ } catch (EOFException ze) { | |
+ // We are at the end of the stream | |
+ return true; | |
+ } catch (IOException ze) { | |
+ // We were not at the end of the stream but what was | |
+ // left in the stream didn't make any sense, ignore it | |
+ // we already have what we came for | |
+ return true; | |
} | |
- return true; | |
+ // We found a valid header for an additional GZIP file | |
+ inf.reset(); | |
+ if (n > m) { | |
+ inf.setInput(buf, len - n + m, n - m); | |
+ } | |
+ return false; // there is more GZIP data, keep going (concatenated gzip data set) | |
} | |
/* |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment