Created
September 14, 2010 11:57
-
-
Save paul-hammant/578920 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## For use with https://addons.mozilla.org/en-US/firefox/addon/212/ | |
## If named to match title of page, this extracts zipped contents for source control usage | |
## inspired by {{{ http://code.activestate.com/recipes/465649/ (r3) | |
## | |
import os, zipfile, sys, subprocess, platform | |
from cStringIO import StringIO | |
def extract( filename, dir ): | |
zfn = filename + '.maff' | |
print 'Processing: ' + zfn | |
zf = zipfile.ZipFile( zfn ) | |
namelist = zf.namelist() | |
# all entries in the maff are in a numbered directory 122123123223_222/ which we don't want. | |
numbered_directory_name_len = len(namelist[0].split('/')[0])+1 | |
filelist = filter( lambda x: not x.endswith( '/' ), namelist ) | |
# make base | |
pushd = os.getcwd() | |
if not os.path.isdir( dir ): | |
os.mkdir( dir ) | |
os.chdir( dir ) | |
for fn in filelist: | |
to_fn = fn[numbered_directory_name_len:] | |
if to_fn.endswith('index.html'): to_fn = filename | |
elif to_fn.endswith('index.rdf'): to_fn = filename.replace('.html','.rdf') | |
if(to_fn.find('/') >= 0 and not os.path.isdir(os.path.dirname(to_fn))): | |
os.makedirs(os.path.dirname(to_fn)) | |
out = open( to_fn, 'wb' ) | |
buffer = StringIO( zf.read( fn )) | |
buflen = 2 ** 20 | |
datum = buffer.read( buflen ) | |
while datum: | |
out.write( datum ) | |
datum = buffer.read( buflen ) | |
out.close() | |
if to_fn.endswith('.html'): | |
# sed is different on Mac and Linux :-( | |
if platform.system() == 'Linux': | |
subprocess.call(['sed', '-i', '-E', '-s', 's/^\s*//g', to_fn]) | |
if platform.system() == 'Darwin': | |
subprocess.call(['sed', '-i', '.bkp', '-E', 's/^[[:space:]]*//g', to_fn]) | |
print ' with ' + str(len(filelist)-2) + ' references.' | |
os.chdir( pushd ) | |
def main(): | |
extract(sys.argv[1].replace('.maff', ''), '.') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment