Created
October 18, 2015 14:41
-
-
Save davide-romanini/09ee2bcf81d7e0725988 to your computer and use it in GitHub Desktop.
Zip pinch for python prototype
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from struct import pack, unpack | |
from collections import namedtuple | |
import zlib | |
EOCD = 'PK\x05\x06' | |
CD = 'PK\x01\x02' | |
EOCDRecord = namedtuple( 'EOCDRecord', [ | |
'signature', | |
'diskNumber', | |
'diskWithCentralDirStart', | |
'centralDirRecordsOnThisDisk', | |
'centralDirRecords', | |
'centralDirSize', | |
'centralDirOffset', | |
'zipCommentLength', | |
'zipComment' | |
]) | |
CDRecord = namedtuple( 'CDRecord', [ | |
'signature', | |
'versionMadeBy', | |
'versionNeeded', | |
'bitFlag', | |
'compressionMethod', | |
'lastModificationTime', | |
'lastModificationDate', | |
'crc32', | |
'compressedSize', | |
'uncompressedSize', | |
'filenameLength', | |
'extraFieldLength', | |
'fileCommentLength', | |
'diskNumberStart', | |
'internalFileAttributes', | |
'externalFileAttributes', | |
'localHeaderOffset', | |
'fileName', | |
'extraField', | |
'fileComment' | |
]) | |
def inflate(data): | |
decompress = zlib.decompressobj( | |
-zlib.MAX_WBITS # see above | |
) | |
inflated = decompress.decompress(data) | |
inflated += decompress.flush() | |
return inflated | |
def extract_eocd(sliceable): | |
last_chunk = sliceable[-4096:] | |
record = last_chunk[last_chunk.rfind(EOCD):] | |
if record == -1: | |
raise StandardError | |
eocd_rec = EOCDRecord._make(unpack('<I4H2IH', record[0:22]) + ('',)) | |
eocd_rec = eocd_rec._replace(zipComment=record[22:eocd_rec.zipCommentLength + 22]) | |
return eocd_rec | |
def extract_cd(sliceable, eocd_rec): | |
cd = sliceable[eocd_rec.centralDirOffset:eocd_rec.centralDirOffset + eocd_rec.centralDirSize] | |
ret = [] | |
for record in cd.split(CD)[1:]: | |
record = CD + record | |
cd_rec = CDRecord._make( | |
unpack('<I6H3I5H2I', record[0:46]) + ('', '', '') | |
) | |
cd_rec = cd_rec._replace( | |
fileName = record[46:cd_rec.filenameLength + 46], | |
extraField = record[46 + cd_rec.filenameLength:46 + cd_rec.filenameLength + cd_rec.extraFieldLength], | |
fileComment = record[46 + cd_rec.filenameLength + cd_rec.extraFieldLength:46 + cd_rec.filenameLength + cd_rec.extraFieldLength + cd_rec.fileCommentLength] | |
) | |
ret.append(cd_rec) | |
return ret | |
def extract_file(sliceable, cd_rec): | |
start = cd_rec.localHeaderOffset + 30 + cd_rec.filenameLength + cd_rec.extraFieldLength | |
end = start + cd_rec.compressedSize | |
compressed = sliceable[start:end] | |
# only deflate or store are supported | |
if cd_rec.compressionMethod == 8: | |
return inflate(compressed) | |
elif cd_rec.compressionMethod == 0: | |
return compressed | |
raise StandardError("Unsupported compression method " + cd_rec.compressionMethod) | |
import mmap | |
import json | |
import sys | |
filename = sys.argv[1] | |
file_index = int(sys.argv[2]) | |
f = open(filename, 'r+b') | |
m = mmap.mmap(f.fileno(), 0) | |
eocd_rec = extract_eocd(m) | |
cd_rec = extract_cd(m, eocd_rec) | |
uncompressed_file = extract_file(m, cd_rec[file_index]) | |
#print eocd_rec | |
#print cd_rec | |
#for rec in cd_rec: | |
# print rec.fileName | |
print uncompressed_file | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment