Created
August 20, 2011 23:06
-
-
Save LaPingvino/1159804 to your computer and use it in GitHub Desktop.
Download yourself a Bible
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
bib=$1 | |
bas=$2 | |
bookfile=`tempfile` | |
versefile=`tempfile` | |
[ -z "$bib" ] && bib="acf" | |
[ -z "$bas" ] && bas="http://www.bibliaonline.com.br/" | |
mkdir -p "/tmp/$bib" | |
echo "cd \"/tmp/$bib\"" > $bookfile | |
echo "DOWNLOAD BOOK INDEX" | |
curl "$bas$bib" | grep ".*book=.*$bas$bib" >> $bookfile | |
sed -i "s/^.* href='/wget /" $bookfile | |
sed -i "s/'.*$//" $bookfile | |
cd "/tmp/$bib" | |
echo "REMOVE OLD FILES" | |
rm /tmp/$bib/* | |
echo "DOWNLOAD CHAPTER INDEX" | |
bash $bookfile | |
grep -E ">[0-9]{1,3}<" * > $versefile | |
sed -i "s/^.*href='/wget /" $versefile | |
sed -i "s/'.*$//" $versefile | |
# echo "SHOW VERSE LINKS" | |
# cat $versefile | |
echo "REMOVE CHAPTER INDEX FILES" | |
rm /tmp/$bib/* | |
echo "DOWNLOAD CHAPTERS" | |
bash $versefile | |
echo "REMOVE TEMPORARY FILES" | |
rm $bookfile $versefile | |
echo "PRUNE CHAPTERS" | |
for file in * | |
do tempfile=`tempfile` | |
book=`grep '<body' $file | sed "s/^.*book='//" | sed "s/'.*$//"` | |
chapter=`printf "%03d" \`grep '<body' $file | sed "s/^.*chapter='//" | sed "s/'.*$//"\`` | |
cat $file | awk 'BEGIN {x = 0} | |
{ | |
if ($0 ~ "<article>") {x = 1} | |
if (x == 1) {print $0} | |
if ($0 ~ "</article>") {x = 0} | |
}' > $tempfile | |
mkdir -p /tmp/$bib/$book/ | |
cp $tempfile /tmp/$bib/$book/$chapter.chap.html | |
rm $tempfile | |
done | |
echo "MERGE BOOKS" | |
for directory in */ | |
do pandoc $directory*.chap.html --from=html --to=html -o $directory/complete.html | |
done | |
echo "ORDERING BOOKS" | |
nm=1 | |
for bb in gn ex lv nm dt js jz rt 1sm 2sm 1rs 2rs 1cr 2cr ed ne et jó sl pv ec ct is jr lm ez dn os jl am ob jn mq na hc sf ag zc ml mt mc lc jo atos rm 1co 2co gl ef fp cl 1ts 2ts 1tm 2tm tt fm hb tg 1pe 2pe 1jo 2jo 3jo jd ap | |
do cp $bb/complete.html "`printf "%02d-$bb.html" $nm`" | |
nm=$(( $nm + 1 )) | |
done | |
echo "FINAL MERGE" | |
pandoc -s *.html --from=html --to=html > final.html |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment