Last active
April 28, 2018 02:41
-
-
Save luckylittle/42c71d1f7b701aa964919148b1805795 to your computer and use it in GitHub Desktop.
Epub extractor for Android application "Safari To Go" data files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
bookid=$(find . -maxdepth 1 -regextype sed -regex ".*/[0-9]\+" | tr -d './') | |
echo "Fix the font's URL of ${bookid}..." | |
sed -i -e 's#\/getfile#http:\/\/my.safaribooksonline.com\/getfile#g' publisher-style.css | |
echo "Download the fonts for ${bookid}..." | |
mkdir -p fonts | |
mkdir -p css | |
rm jquery.js | |
for i in $(sed -ne 's#.*\(http[^"]*\).*);#\1#p' publisher-style.css | tr -d '\r') | |
do | |
wget -d ${i} -O fonts/${i##*/} | tr -d '\r' | |
done | |
mv publisher-style.css css/style.css | |
echo "Rename _xhtml and _html in ${bookid}..." | |
find . -name "*_xhtml" -exec sh -c 'mv "$1" "${1%_xhtml}_xhtml.xhtml"' _ {} \; | |
find . -name "*_html" -exec sh -c 'mv "$1" "${1%_html}_html.html"' _ {} \; | |
# Below is for cases where filename contains index after '(x)html_', e.g.: | |
# idparadest_42_html | |
# idparadest_42_html_2 | |
# ... | |
find . -name "*html_[[:digit:]]" -print -exec mv {} {}.html \; | |
find . -name "*html_[[:digit:]][[:digit:]]" -print -exec mv {} {}.html \; | |
find . -name "*html_[[:digit:]][[:digit:]][[:digit:]]" -print -exec mv {} {}.html \; | |
find . -name "*xhtml_[[:digit:]]" -print -exec mv {} {}.xhtml \; | |
find . -name "*xhtml_[[:digit:]][[:digit:]]" -print -exec mv {} {}.xhtml \; | |
find . -name "*xhtml_[[:digit:]][[:digit:]][[:digit:]]" -print -exec mv {} {}.xhtml \; | |
echo "Remove javascript from (x)html files..." | |
# This: | |
# <a href="javascript:___MoveTo('9781457191435/07_foreword_1_xhtml','')" data-ajax="1"> | |
# or | |
# <a href="javascript:___MoveTo('9780134771922/ch11_xhtml','ch11')" data-ajax="1"> | |
# Will need to become: | |
# <a href="07_foreword_1_xhtml"> | |
sed -i -e "s#javascript:___MoveTo('${bookid}/##g" ${bookid}/*.*html | |
sed -i -e "s#','.*')\"#\"#g" ${bookid}/*.*html | |
sed -i -e "s# data-ajax=\"1\"##g" ${bookid}/*.*html | |
echo "Beautify XML metadata of ${bookid}..." | |
#sudo yum install -y libxml2 | |
xmllint --format metadata > metadata.xml | |
mv metadata.xml metadata.opf | |
rm -f metadata | |
echo "Create cover page" | |
cat <<'EOF' >> cover.xhtml | |
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> | |
<html xmlns="http://www.w3.org/1999/xhtml"> | |
<head> | |
<title>Cover</title> | |
<style type="text/css"> img { max-width: 100%; } </style> | |
</head> | |
<body> | |
<div id="cover-image"> | |
<img src="cover.jpg" alt="Cover"/> | |
</div> | |
</body> | |
</html> | |
EOF | |
echo "Construction of spine.xml" | |
echo cat '//book/safarimeta/spine' | xmllint --shell metadata.opf > spine.xml | |
sed -i -e "s#<spine>#<manifest>#g" spine.xml | |
sed -i -e "s#</spine>#</manifest>#g" spine.xml | |
sed -i -e "s#<itemref xmlid=#<item href=#g" spine.xml | |
sed -i -e 's#index="#id="id#g' spine.xml | |
sed -i -e 's#/># media-type="application/xhtml+xml"/>#g' spine.xml | |
sed -i -e 's#id="id0"#id="id"#g' spine.xml | |
sed -i -e 's#/ > -------##g' spine.xml | |
sed -i -e 's#<manifest># <item href="toc.ncx" id="ncx" media-type="application/x-dtbncx+xml"/>#g' spine.xml | |
sed -i -e 's#</manifest>##g' spine.xml | |
sed -i -e 's#/ >##g' spine.xml | |
echo "Done!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment