Skip to content

Instantly share code, notes, and snippets.

Created December 10, 2010 00:05
Show Gist options
  • Save cflewis/735550 to your computer and use it in GitHub Desktop.
Save cflewis/735550 to your computer and use it in GitHub Desktop.
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
* An instance of {@code ZipEntry} represents an entry within a <i>ZIP-archive</i>.
* An entry has attributes such as name (= path) or the size of its data. While
* an entry identifies data stored in an archive, it does not hold the data
* itself. For example when reading a <i>ZIP-file</i> you will first retrieve
* all its entries in a collection and then read the data for a specific entry
* through an input stream.
* @see ZipFile
* @see ZipOutputStream
public class ZipEntry implements ZipConstants, Cloneable {
String name, comment;
long compressedSize = -1, crc = -1, size = -1;
int compressionMethod = -1, time = -1, modDate = -1;
byte[] extra;
int nameLen = -1;
long mLocalHeaderRelOffset = -1;
* Zip entry state: Deflated.
public static final int DEFLATED = 8;
* Zip entry state: Stored.
public static final int STORED = 0;
* Constructs a new {@code ZipEntry} with the specified name.
* @param name
* the name of the ZIP entry.
* @throws IllegalArgumentException
* if the name length is outside the range (> 0xFFFF).
public ZipEntry(String name) {
if (name == null) {
throw new NullPointerException();
if (name.length() > 0xFFFF) {
throw new IllegalArgumentException();
} = name;
* Gets the comment for this {@code ZipEntry}.
* @return the comment for this {@code ZipEntry}, or {@code null} if there
* is no comment. If we're reading an archive with
* {@code ZipInputStream} the comment is not available.
public String getComment() {
return comment;
* Gets the compressed size of this {@code ZipEntry}.
* @return the compressed size, or -1 if the compressed size has not been
* set.
public long getCompressedSize() {
return compressedSize;
* Gets the checksum for this {@code ZipEntry}.
* @return the checksum, or -1 if the checksum has not been set.
public long getCrc() {
return crc;
* Gets the extra information for this {@code ZipEntry}.
* @return a byte array containing the extra information, or {@code null} if
* there is none.
public byte[] getExtra() {
return extra;
* Gets the compression method for this {@code ZipEntry}.
* @return the compression method, either {@code DEFLATED}, {@code STORED}
* or -1 if the compression method has not been set.
public int getMethod() {
return compressionMethod;
* Gets the name of this {@code ZipEntry}.
* @return the entry name.
public String getName() {
return name;
* Gets the uncompressed size of this {@code ZipEntry}.
* @return the uncompressed size, or {@code -1} if the size has not been
* set.
public long getSize() {
return size;
* Gets the last modification time of this {@code ZipEntry}.
* @return the last modification time as the number of milliseconds since
* Jan. 1, 1970.
public long getTime() {
if (time != -1) {
GregorianCalendar cal = new GregorianCalendar();
cal.set(Calendar.MILLISECOND, 0);
cal.set(1980 + ((modDate >> 9) & 0x7f), ((modDate >> 5) & 0xf) - 1,
modDate & 0x1f, (time >> 11) & 0x1f, (time >> 5) & 0x3f,
(time & 0x1f) << 1);
return cal.getTime().getTime();
return -1;
* Determine whether or not this {@code ZipEntry} is a directory.
* @return {@code true} when this {@code ZipEntry} is a directory, {@code
* false} otherwise.
public boolean isDirectory() {
return name.charAt(name.length() - 1) == '/';
* Sets the comment for this {@code ZipEntry}.
* @param string
* the comment for this entry.
public void setComment(String string) {
if (string == null || string.length() <= 0xFFFF) {
comment = string;
} else {
throw new IllegalArgumentException();
* Sets the compressed size for this {@code ZipEntry}.
* @param value
* the compressed size (in bytes).
public void setCompressedSize(long value) {
compressedSize = value;
* Sets the checksum for this {@code ZipEntry}.
* @param value
* the checksum for this entry.
* @throws IllegalArgumentException
* if {@code value} is < 0 or > 0xFFFFFFFFL.
public void setCrc(long value) {
if (value >= 0 && value <= 0xFFFFFFFFL) {
crc = value;
} else {
throw new IllegalArgumentException();
* Sets the extra information for this {@code ZipEntry}.
* @param data
* a byte array containing the extra information.
* @throws IllegalArgumentException
* when the length of data is greater than 0xFFFF bytes.
public void setExtra(byte[] data) {
if (data == null || data.length <= 0xFFFF) {
extra = data;
} else {
throw new IllegalArgumentException();
* Sets the compression method for this {@code ZipEntry}.
* @param value
* the compression method, either {@code DEFLATED} or {@code
* @throws IllegalArgumentException
* when value is not {@code DEFLATED} or {@code STORED}.
public void setMethod(int value) {
if (value != STORED && value != DEFLATED) {
throw new IllegalArgumentException();
compressionMethod = value;
* Sets the uncompressed size of this {@code ZipEntry}.
* @param value
* the uncompressed size for this entry.
* @throws IllegalArgumentException
* if {@code value} < 0 or {@code value} > 0xFFFFFFFFL.
public void setSize(long value) {
if (value >= 0 && value <= 0xFFFFFFFFL) {
size = value;
} else {
throw new IllegalArgumentException();
* Sets the modification time of this {@code ZipEntry}.
* @param value
* the modification time as the number of milliseconds since Jan.
* 1, 1970.
public void setTime(long value) {
GregorianCalendar cal = new GregorianCalendar();
cal.setTime(new Date(value));
int year = cal.get(Calendar.YEAR);
if (year < 1980) {
modDate = 0x21;
time = 0;
} else {
modDate = cal.get(Calendar.DATE);
modDate = (cal.get(Calendar.MONTH) + 1 << 5) | modDate;
modDate = ((cal.get(Calendar.YEAR) - 1980) << 9) | modDate;
time = cal.get(Calendar.SECOND) >> 1;
time = (cal.get(Calendar.MINUTE) << 5) | time;
time = (cal.get(Calendar.HOUR_OF_DAY) << 11) | time;
* Returns the string representation of this {@code ZipEntry}.
* @return the string representation of this {@code ZipEntry}.
public String toString() {
return name;
* Constructs a new {@code ZipEntry} using the values obtained from {@code
* ze}.
* @param ze
* the {@code ZipEntry} from which to obtain values.
public ZipEntry(ZipEntry ze) {
name =;
comment = ze.comment;
time = ze.time;
size = ze.size;
compressedSize = ze.compressedSize;
crc = ze.crc;
compressionMethod = ze.compressionMethod;
modDate = ze.modDate;
extra = ze.extra;
nameLen = ze.nameLen;
mLocalHeaderRelOffset = ze.mLocalHeaderRelOffset;
* Returns a shallow copy of this entry.
* @return a copy of this entry.
public Object clone() {
return new ZipEntry(this);
* Returns the hash code for this {@code ZipEntry}.
* @return the hash code of the entry.
public int hashCode() {
return name.hashCode();
* Internal constructor. Creates a new ZipEntry by reading the
* Central Directory Entry from "in", which must be positioned at
* the CDE signature.
* On exit, "in" will be positioned at the start of the next entry.
ZipEntry(LittleEndianReader ler, InputStream in) throws IOException {
* We're seeing performance issues when we call readShortLE and
* readIntLE, so we're going to read the entire header at once
* and then parse the results out without using any function calls.
* Uglier, but should be much faster.
* Note that some lines look a bit different, because the corresponding
* fields or locals are long and so we need to do & 0xffffffffl to avoid
* problems induced by sign extension.
byte[] hdrBuf = ler.hdrBuf;
myReadFully(in, hdrBuf);
long sig = (hdrBuf[0] & 0xff) | ((hdrBuf[1] & 0xff) << 8) |
((hdrBuf[2] & 0xff) << 16) | ((hdrBuf[3] << 24) & 0xffffffffL);
if (sig != CENSIG) {
throw new ZipException("Zip is null");
compressionMethod = (hdrBuf[10] & 0xff) | ((hdrBuf[11] & 0xff) << 8);
time = (hdrBuf[12] & 0xff) | ((hdrBuf[13] & 0xff) << 8);
modDate = (hdrBuf[14] & 0xff) | ((hdrBuf[15] & 0xff) << 8);
crc = (hdrBuf[16] & 0xff) | ((hdrBuf[17] & 0xff) << 8)
| ((hdrBuf[18] & 0xff) << 16)
| ((hdrBuf[19] << 24) & 0xffffffffL);
compressedSize = (hdrBuf[20] & 0xff) | ((hdrBuf[21] & 0xff) << 8)
| ((hdrBuf[22] & 0xff) << 16)
| ((hdrBuf[23] << 24) & 0xffffffffL);
size = (hdrBuf[24] & 0xff) | ((hdrBuf[25] & 0xff) << 8)
| ((hdrBuf[26] & 0xff) << 16)
| ((hdrBuf[27] << 24) & 0xffffffffL);
nameLen = (hdrBuf[28] & 0xff) | ((hdrBuf[29] & 0xff) << 8);
int extraLen = (hdrBuf[30] & 0xff) | ((hdrBuf[31] & 0xff) << 8);
int commentLen = (hdrBuf[32] & 0xff) | ((hdrBuf[33] & 0xff) << 8);
mLocalHeaderRelOffset = (hdrBuf[42] & 0xff) | ((hdrBuf[43] & 0xff) << 8)
| ((hdrBuf[44] & 0xff) << 16)
| ((hdrBuf[45] << 24) & 0xffffffffL);
byte[] nameBytes = new byte[nameLen];
myReadFully(in, nameBytes);
byte[] commentBytes = null;
if (commentLen > 0) {
commentBytes = new byte[commentLen];
myReadFully(in, commentBytes);
if (extraLen > 0) {
extra = new byte[extraLen];
myReadFully(in, extra);
try {
* The actual character set is "IBM Code Page 437". As of
* Sep 2006, the Zip spec (APPNOTE.TXT) supports UTF-8. When
* bit 11 of the GP flags field is set, the file name and
* comment fields are UTF-8.
* TODO: add correct UTF-8 support.
name = new String(nameBytes, "ISO-8859-1");
if (commentBytes != null) {
comment = new String(commentBytes, "ISO-8859-1");
} else {
comment = null;
} catch (UnsupportedEncodingException uee) {
throw new InternalError("Unsupported encoding");
private void myReadFully(InputStream in, byte[] b) throws IOException {
int len = b.length;
int off = 0;
while (len > 0) {
int count =, off, len);
if (count <= 0) {
throw new EOFException();
off += count;
len -= count;
* Read a four-byte int in little-endian order.
static long readIntLE(RandomAccessFile raf) throws IOException {
int b0 =;
int b1 =;
int b2 =;
int b3 =;
if (b3 < 0) {
throw new EOFException("EOF");
return b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); // ATTENTION: DOES SIGN EXTENSION: IS THIS WANTED?
static class LittleEndianReader {
private byte[] b = new byte[4];
byte[] hdrBuf = new byte[CENHDR];
* Read a two-byte short in little-endian order.
int readShortLE(InputStream in) throws IOException {
if (, 0, 2) == 2) {
return (b[0] & 0XFF) | ((b[1] & 0XFF) << 8);
} else {
throw new EOFException("Encoding unknown");
* Read a four-byte int in little-endian order.
long readIntLE(InputStream in) throws IOException {
if (, 0, 4) == 4) {
return ( ((b[0] & 0XFF))
| ((b[1] & 0XFF) << 8)
| ((b[2] & 0XFF) << 16)
| ((b[3] & 0XFF) << 24))
& 0XFFFFFFFFL; // Here for sure NO sign extension is wanted.
} else {
throw new EOFException("EOF");
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
import java.util.Enumeration;
import java.util.LinkedHashMap;
import java.util.Iterator;
* This class provides random read access to a <i>ZIP-archive</i> file.
* <p>
* While {@code ZipInputStream} provides stream based read access to a
* <i>ZIP-archive</i>, this class implements more efficient (file based) access
* and makes use of the <i>central directory</i> within a <i>ZIP-archive</i>.
* <p>
* Use {@code ZipOutputStream} if you want to create an archive.
* <p>
* A temporary ZIP file can be marked for automatic deletion upon closing it.
* @see ZipEntry
* @see ZipOutputStream
public class ZipFile implements ZipConstants {
* Open ZIP file for read.
public static final int OPEN_READ = 1;
* Delete ZIP file when closed.
public static final int OPEN_DELETE = 4;
private final String fileName;
private File fileToDeleteOnClose;
private RandomAccessFile mRaf;
private final ZipEntry.LittleEndianReader ler = new ZipEntry.LittleEndianReader();
private final LinkedHashMap<String, ZipEntry> mEntries
= new LinkedHashMap<String, ZipEntry>();
* Constructs a new {@code ZipFile} with the specified file.
* @param file
* the file to read from.
* @throws ZipException
* if a ZIP error occurs.
* @throws IOException
* if an {@code IOException} occurs.
public ZipFile(File file) throws ZipException, IOException {
this(file, OPEN_READ);
* Opens a file as <i>ZIP-archive</i>. "mode" must be {@code OPEN_READ} or
* {@code OPEN_DELETE} . The latter sets the "delete on exit" flag through a
* file.
* @param file
* the ZIP file to read.
* @param mode
* the mode of the file open operation.
* @throws IOException
* if an {@code IOException} occurs.
public ZipFile(File file, int mode) throws IOException {
System.err.println("Using custom ZipFile implementation");
fileName = file.getPath();
if (mode != OPEN_READ && mode != (OPEN_READ | OPEN_DELETE)) {
throw new IllegalArgumentException();
SecurityManager security = System.getSecurityManager();
if (security != null) {
if ((mode & OPEN_DELETE) != 0) {
if (security != null) {
fileToDeleteOnClose = file; // file.deleteOnExit();
} else {
fileToDeleteOnClose = null;
// JPF's implementation doesn't do filenames, it doesn file objects
//mRaf = new RandomAccessFile(fileName, "r");
mRaf = new RandomAccessFile(file, "r");
* Opens a ZIP archived file.
* @param name
* the name of the ZIP file.
* @throws IOException
* if an IOException occurs.
public ZipFile(String name) throws IOException {
this(new File(name), OPEN_READ);
protected void finalize() throws IOException {
* Closes this ZIP file. This method is idempotent.
* @throws IOException
* if an IOException occurs.
public void close() throws IOException {
RandomAccessFile raf = mRaf;
if (raf != null) { // Only close initialized instances
synchronized(raf) {
mRaf = null;
if (fileToDeleteOnClose != null) {
AccessController.doPrivileged(new PrivilegedAction<Object>() {
public Object run() {
new File(fileName).delete();
return null;
// fileToDeleteOnClose.delete();
fileToDeleteOnClose = null;
private void checkNotClosed() {
if (mRaf == null) {
throw new IllegalStateException("Zip file null");
* Returns an enumeration of the entries. The entries are listed in the
* order in which they appear in the ZIP archive.
* @return the enumeration of the entries.
* @throws IllegalStateException if this ZIP file has been closed.
public Enumeration<? extends ZipEntry> entries() {
final Iterator<ZipEntry> iterator = mEntries.values().iterator();
return new Enumeration<ZipEntry>() {
public boolean hasMoreElements() {
return iterator.hasNext();
public ZipEntry nextElement() {
* Gets the ZIP entry with the specified name from this {@code ZipFile}.
* @param entryName
* the name of the entry in the ZIP file.
* @return a {@code ZipEntry} or {@code null} if the entry name does not
* exist in the ZIP file.
* @throws IllegalStateException if this ZIP file has been closed.
public ZipEntry getEntry(String entryName) {
if (entryName == null) {
throw new NullPointerException();
ZipEntry ze = mEntries.get(entryName);
if (ze == null) {
ze = mEntries.get(entryName + "/");
return ze;
* Returns an input stream on the data of the specified {@code ZipEntry}.
* @param entry
* the ZipEntry.
* @return an input stream of the data contained in the {@code ZipEntry}.
* @throws IOException
* if an {@code IOException} occurs.
* @throws IllegalStateException if this ZIP file has been closed.
public InputStream getInputStream(ZipEntry entry) throws IOException {
* Make sure this ZipEntry is in this Zip file. We run it through
* the name lookup.
entry = getEntry(entry.getName());
if (entry == null) {
return null;
* Create a ZipInputStream at the right part of the file.
RandomAccessFile raf = mRaf;
synchronized (raf) {
// We don't know the entry data's start position. All we have is the
// position of the entry's local header. At position 28 we find the
// length of the extra data. In some cases this length differs from
// the one coming in the central header.
RAFStream rafstrm = new RAFStream(raf,
entry.mLocalHeaderRelOffset + 28);
int localExtraLenOrWhatever = ler.readShortLE(rafstrm);
// Skip the name and this "extra" data or whatever it is:
rafstrm.skip(entry.nameLen + localExtraLenOrWhatever);
rafstrm.mLength = rafstrm.mOffset + entry.compressedSize;
if (entry.compressionMethod == ZipEntry.DEFLATED) {
int bufSize = Math.max(1024, (int)Math.min(entry.getSize(), 65535L));
return new ZipInflaterInputStream(rafstrm, new Inflater(true), bufSize, entry);
} else {
return rafstrm;
* Gets the file name of this {@code ZipFile}.
* @return the file name of this {@code ZipFile}.
public String getName() {
return fileName;
* Returns the number of {@code ZipEntries} in this {@code ZipFile}.
* @return the number of entries in this file.
* @throws IllegalStateException if this ZIP file has been closed.
public int size() {
return mEntries.size();
* Find the central directory and read the contents.
* <p>The central directory can be followed by a variable-length comment
* field, so we have to scan through it backwards. The comment is at
* most 64K, plus we have 18 bytes for the end-of-central-dir stuff
* itself, plus apparently sometimes people throw random junk on the end
* just for the fun of it.
* <p>This is all a little wobbly. If the wrong value ends up in the EOCD
* area, we're hosed. This appears to be the way that everybody handles
* it though, so we're in good company if this fails.
private void readCentralDir() throws IOException {
* Scan back, looking for the End Of Central Directory field. If
* the archive doesn't have a comment, we'll hit it on the first
* try.
* No need to synchronize mRaf here -- we only do this when we
* first open the Zip file.
long scanOffset = mRaf.length() - ENDHDR;
if (scanOffset < 0) {
throw new ZipException("Scanner went out of bounds");
long stopOffset = scanOffset - 65536;
if (stopOffset < 0) {
stopOffset = 0;
while (true) {;
if (ZipEntry.readIntLE(mRaf) == 101010256L) {
if (scanOffset < stopOffset) {
throw new ZipException("Scanner out of bounds");
* Found it, read the EOCD.
* For performance we want to use buffered I/O when reading the
* file. We wrap a buffered stream around the random-access file
* object. If we just read from the RandomAccessFile we'll be
* doing a read() system call every time.
RAFStream rafs = new RAFStream(mRaf, mRaf.getFilePointer());
BufferedInputStream bin = new BufferedInputStream(rafs, ENDHDR);
int diskNumber = ler.readShortLE(bin);
int diskWithCentralDir = ler.readShortLE(bin);
int numEntries = ler.readShortLE(bin);
int totalNumEntries = ler.readShortLE(bin);
/*centralDirSize =*/ ler.readIntLE(bin);
long centralDirOffset = ler.readIntLE(bin);
/*commentLen =*/ ler.readShortLE(bin);
if (numEntries != totalNumEntries ||
diskNumber != 0 ||
diskWithCentralDir != 0) {
throw new ZipException("Buffer out of bounds");
* Seek to the first CDE and read all entries.
* However, when Z_SYNC_FLUSH is used the offset may not point directly
* to the CDE so skip over until we find it.
* At most it will be 6 bytes away (one or two bytes for empty block, 4 bytes for
* empty block signature).
scanOffset = centralDirOffset;
stopOffset = scanOffset + 6;
while (true) {;
if (ZipEntry.readIntLE(mRaf) == CENSIG) {
if (scanOffset > stopOffset) {
throw new ZipException("Scanner out of bounds");
// If CDE is found then go and read all the entries
rafs = new RAFStream(mRaf, scanOffset);
bin = new BufferedInputStream(rafs, 4096);
for (int i = 0; i < numEntries; i++) {
ZipEntry newEntry = new ZipEntry(ler, bin);
mEntries.put(newEntry.getName(), newEntry);
* Wrap a stream around a RandomAccessFile. The RandomAccessFile is shared
* among all streams returned by getInputStream(), so we have to synchronize
* access to it. (We can optimize this by adding buffering here to reduce
* collisions.)
* <p>We could support mark/reset, but we don't currently need them.
static class RAFStream extends InputStream {
RandomAccessFile mSharedRaf;
long mOffset;
long mLength;
public RAFStream(RandomAccessFile raf, long pos) throws IOException {
mSharedRaf = raf;
mOffset = pos;
mLength = raf.length();
public int available() throws IOException {
if (mLength > mOffset) {
if (mLength - mOffset < Integer.MAX_VALUE) {
return (int)(mLength - mOffset);
} else {
return Integer.MAX_VALUE;
} else {
return 0;
public int read() throws IOException {
byte[] singleByteBuf = new byte[1];
if (read(singleByteBuf, 0, 1) == 1) {
return singleByteBuf[0] & 0XFF;
} else {
return -1;
public int read(byte[] b, int off, int len) throws IOException {
synchronized (mSharedRaf) {;
if (len > mLength - mOffset) {
len = (int) (mLength - mOffset);
int count =, off, len);
if (count > 0) {
mOffset += count;
return count;
} else {
return -1;
public long skip(long n) throws IOException {
if (n > mLength - mOffset) {
n = mLength - mOffset;
mOffset += n;
return n;
static class ZipInflaterInputStream extends InflaterInputStream {
ZipEntry entry;
long bytesRead = 0;
public ZipInflaterInputStream(InputStream is, Inflater inf, int bsize, ZipEntry entry) {
super(is, inf, bsize);
this.entry = entry;
public int read(byte[] buffer, int off, int nbytes) throws IOException {
int i =, off, nbytes);
if (i != -1) {
bytesRead += i;
return i;
public int available() throws IOException {
return super.available() == 0 ? 0 : (int) (entry.getSize() - bytesRead);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment