Skip to content

Instantly share code, notes, and snippets.

@PatrickLerner
Last active December 19, 2015 00:58
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PatrickLerner/5871954 to your computer and use it in GitHub Desktop.
Save PatrickLerner/5871954 to your computer and use it in GitHub Desktop.
#!/bin/bash
OPTIND=1
ruby=0
url=""
open=0
notvertical=0
download=0
show_help () {
echo " Usage:"
echo " -u URL"
echo ""
echo " Optional:"
echo " -n (not vertical, i.e. horizontal)"
echo " -o (open with Kindle for Mac after generation)"
echo " -r (include furigana)"
echo " -h (show help)"
echo " -d (download mp3)"
echo ""
echo ""
echo " This script was only tested on a Mac."
echo " (most of it should work on Linux just fine)"
echo ""
echo " Required packages:"
echo " - pandoc"
echo " - kindlegen"
echo " - wget"
echo ""
echo " Optional packages:"
echo " - Kindle for Mac"
echo ""
echo " Author: Patrick Lerner - patricklerner@me.com"
}
while getopts "h?rondu:" opt; do
case "$opt" in
h|\?)
show_help
exit 0
;;
r) ruby=1
;;
n) notvertical=1
;;
u) url=$OPTARG
;;
o) open=1
;;
d) download=1
;;
esac
done
if [[ -z "$url" ]] ; then
echo "Call me with a valid url, dude."
exit 1
fi
temp_dir=$(mktemp -d -t tmp.XXXXXXXXXX)
function finish {
rm -rf "${temp_dir}"
}
trap finish EXIT
echo "body { font-family: serif; } h2 { font-size: 120%; font-weight: bold; padding-top: 2em; margin-right: 1em; margin-left: 1em; } p { text-indent: 1em; } #newsDate { font-size: 90%; font-weight:bold; line-height: 1.5; }" >"${temp_dir}/file.css"
if [[ "$notvertical" -eq 0 ]] ; then
echo "body { -webkit-writing-mode: vertical-rl; } #newsDate { padding-top: 10em; text-indent: -4em;} " >>"${temp_dir}/file.css"
fi
removeRuby () {
utf8sed 's/<ruby>(.*?)<rt>(.*?)<\/rt><\/ruby>/$1/g'
}
getContentWithTitle() {
utf8sed 's/\n//' |
utf8sed 's/.*<div id="newstitle">(.*?)<\/div>.*<div id="newsarticle">(.*?)<\/div>.*/$1\n$2/s' |
utf8sed 's/<[\/]?(a|span).*?>//sg' |
utf8sed 's/^[ ]+//g' |
utf8sed "s/ [ ]+/\n/g" |
utf8sed 'tr/0-9/0-9/' |
utf8sed 's/h2/h2/g'
}
getTitle() {
utf8sed 's/\n//' |
utf8sed 's/.*<div id="newstitle">.*?<h2>(.*?)<\/h2>.*?<\/div>.*/$1/s' |
utf8sed 's/<[\/]?(a|span).*?>//sg' |
utf8sed 's/^[ ]+//g' |
utf8sed "s/ [ ]+/\n/g" |
utf8sed 'tr/0-9/0-9/' |
utf8sed 's/h2/h2/g'
}
content="$(wget -q -O - "${url}")"
title="$(echo "$content" | getTitle | removeRuby)"
# opf file
echo "<?xml version=\"1.0\" encoding=\"UTF-8\"?>
<package version=\"3.0\" xmlns=\"http://www.idpf.org/2007/opf\"
unique-identifier=\"BookId\">
<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\"
xmlns:dcterms=\"http://purl.org/dc/terms/\">
<dc:title>${title}</dc:title>
<dc:contributor>NHK</dc:contributor>
<dc:language>ja</dc:language>
<dc:publisher>NHK</dc:publisher>
</metadata>
<manifest>
<item id=\"style\" href=\"file.css\" media-type=\"text/css\" />
<item id=\"titlepage\" href=\"file.html\" media-type=\"application/xhtml+xml\" />
</manifest>
<spine toc=\"tocncx\" page-progression-direction=\"rtl\">
<itemref idref=\"titlepage\" />
</spine>
</package>" >"${temp_dir}/file.opf"
# opf file end
fileContent="$(echo "$content" | getContentWithTitle)"
if [[ "$ruby" -eq "0" ]] ; then
fileContent=$(echo "$fileContent" | removeRuby)
fi
echo "$fileContent" > "${temp_dir}/file.proto.md"
pandoc "${temp_dir}/file.proto.md" -c "${temp_dir}/file.css" -o "${temp_dir}/file.html"
fileContent=$(cat "${temp_dir}/file.html")
echo "$fileContent" |
sed "s/<title>/<title>${title}/" |
sed "s/<\/head>/<link rel=\"Schema\.DC\" href=\"http:\/\/purl\.org\/dc\/elements\/1\.1\/\" \/><meta name=\"DC\.Title\" content=\"${title}\" \/><meta name=\"DC\.Creator\" content=\"NHK\" \/><meta name=\"DC\.Publisher\" content=\"NHK\" \/><\/head>/" |
sed "s/<html /<html xml:lang=\"ja\" /" > "${temp_dir}/file.html"
kindlegen "${temp_dir}/file.opf"
cp "${temp_dir}/file.mobi" "${title}.mobi"
if [[ "$open" -eq "1" ]] ; then
killall Kindle
rm -rf "$HOME/Library/Application Support/Kindle/My Kindle Content/${title}.mobi"
open "${title}.mobi"
fi
if [[ "$download" -eq "1" ]] ; then
wget -c "$(echo "${url}" | sed 's/html/mp3/')" -O "${title}.mp3"
fi
perl -Mutf8 -CSAD -pe "$1"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment