#!/usr/bin/python | |
# | |
# This is a python script. You need a Python interpreter to run it. | |
# For example, ActiveState Python, which exists for windows. | |
# | |
# This script strips the penultimate record from a Mobipocket file. | |
# This is useful because the current KindleGen add a compressed copy | |
# of the source files used in this record, making the ebook produced | |
# about twice as big as it needs to be. | |
# | |
# | |
# This is free and unencumbered software released into the public domain. | |
# | |
# Anyone is free to copy, modify, publish, use, compile, sell, or | |
# distribute this software, either in source code form or as a compiled | |
# binary, for any purpose, commercial or non-commercial, and by any | |
# means. | |
# | |
# In jurisdictions that recognize copyright laws, the author or authors | |
# of this software dedicate any and all copyright interest in the | |
# software to the public domain. We make this dedication for the benefit | |
# of the public at large and to the detriment of our heirs and | |
# successors. We intend this dedication to be an overt act of | |
# relinquishment in perpetuity of all present and future rights to this | |
# software under copyright law. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
# OTHER DEALINGS IN THE SOFTWARE. | |
# | |
# For more information, please refer to <http://unlicense.org/> | |
# | |
# Written by Paul Durrant, 2010-2011, paul@durrant.co.uk | |
# | |
# Changelog | |
# 1.00 - Initial version | |
# 1.10 - Added an option to output the stripped data | |
# 1.20 - Added check for source files section (thanks Piquan) | |
__version__ = '1.20' | |
import sys | |
import struct | |
import binascii | |
class Unbuffered: | |
def __init__(self, stream): | |
self.stream = stream | |
def write(self, data): | |
self.stream.write(data) | |
self.stream.flush() | |
def __getattr__(self, attr): | |
return getattr(self.stream, attr) | |
class StripException(Exception): | |
pass | |
class SectionStripper: | |
def loadSection(self, section): | |
if (section + 1 == self.num_sections): | |
endoff = len(self.data_file) | |
else: | |
endoff = self.sections[section + 1][0] | |
off = self.sections[section][0] | |
return self.data_file[off:endoff] | |
def patch(self, off, new): | |
self.data_file = self.data_file[:off] + new + self.data_file[off+len(new):] | |
def strip(self, off, len): | |
self.data_file = self.data_file[:off] + self.data_file[off+len:] | |
def patchSection(self, section, new, in_off = 0): | |
if (section + 1 == self.num_sections): | |
endoff = len(self.data_file) | |
else: | |
endoff = self.sections[section + 1][0] | |
off = self.sections[section][0] | |
assert off + in_off + len(new) <= endoff | |
self.patch(off + in_off, new) | |
def __init__(self, datain): | |
if datain[0x3C:0x3C+8] != 'BOOKMOBI': | |
raise StripException("invalid file format") | |
self.num_sections, = struct.unpack('>H', datain[76:78]) | |
#get starting offsets for penultimate and last section | |
self.penoffset, = struct.unpack('>L',datain[78+(self.num_sections-2)*8:78+(self.num_sections-2)*8+4]) | |
self.lastoffset, = struct.unpack('>L',datain[78+(self.num_sections-1)*8:78+(self.num_sections-1)*8+4]) | |
#check penultimate section for SRCS as first four charcaters | |
if datain[self.penoffset:self.penoffset+4] != 'SRCS': | |
raise StripException("File doesn't contain the sources section.") | |
#reduce section count by one | |
self.num_sections = self.num_sections-1 | |
#copy start of file with new number of sections | |
self.data_file = datain[:76] + struct.pack('>H',self.num_sections) | |
# copy first n-2 section data, adjusting the offsets | |
for i in xrange(self.num_sections-1): | |
self.offset, = struct.unpack('>L', datain[78+i*8:78+i*8+4]) | |
self.data_file += struct.pack('>L',self.offset-8) + datain[78+i*8+4:78+i*8+8] | |
#copy and adjust last section data, skipping penultimate one, but using penultimate offset. | |
self.data_file += struct.pack('>L',self.penoffset-8) + datain[78+self.num_sections*8+4:78+self.num_sections*8+8] | |
#copy rest of file up to penultimate section | |
self.data_file += datain[78+self.num_sections*8+8:self.penoffset] | |
#copy last section | |
self.data_file += datain[self.lastoffset:] | |
#store away the penultimate section in case the user wants it output | |
self.stripped_data_header = datain[self.penoffset:self.penoffset+16] | |
self.stripped_data = datain[self.penoffset+16:self.lastoffset] | |
print "done" | |
def getResult(self): | |
return self.data_file | |
def getStrippedData(self): | |
return self.stripped_data | |
def getHeader(self): | |
return self.stripped_data_header | |
if __name__ == "__main__": | |
sys.stdout=Unbuffered(sys.stdout) | |
print ('KindleStrip v%(__version__)s. ' | |
'Written 2010-2011 by Paul Durrant.' % globals()) | |
if len(sys.argv)<3 or len(sys.argv)>4: | |
print "Strips the penultimate record from Mobipocket ebooks" | |
print "For ebooks generated using KindleGen 1.1 that adds the source" | |
print "Usage:" | |
print " %s <infile> <outfile> <strippeddatafile>" % sys.argv[0] | |
print "<strippeddatafile> is optional." | |
sys.exit(1) | |
else: | |
infile = sys.argv[1] | |
outfile = sys.argv[2] | |
data_file = file(infile, 'rb').read() | |
try: | |
strippedFile = SectionStripper(data_file) | |
file(outfile, 'wb').write(strippedFile.getResult()) | |
print "Header Bytes: " + binascii.b2a_hex(strippedFile.getHeader()) | |
if len(sys.argv)==4: | |
file(sys.argv[3], 'wb').write(strippedFile.getStrippedData()) | |
except StripException, e: | |
print "Error: %s" % e | |
sys.exit(1) | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment