Skip to content

Instantly share code, notes, and snippets.

@mrzapp
Last active January 19, 2017 13:06
Show Gist options
  • Save mrzapp/17685cded6af49b9b06030b54abafa8c to your computer and use it in GitHub Desktop.
Save mrzapp/17685cded6af49b9b06030b54abafa8c to your computer and use it in GitHub Desktop.
A script for downloading manga from mangafox.me. Example usage: ./mangafox-downloader.sh -t sket_dance
#!/bin/bash
####################
# Parse chapters
####################
function parse_chapters {
# Find all chapter links
LINK_HTML_REGEX="<a href=\"$BASE_URL/v([0-9a-zA-Z]+)/c([0-9.]+)/([0-9]+).html\" title=\"(?:[a-zA-Z0-9.,?! -]+)?\" class=\"tips\">"
# Loop through matches
# TODO: Reverse this order
echo $1 | grep -oP "$LINK_HTML_REGEX" | while read -r LINK_HTML ; do
VOLUME=$(echo "$LINK_HTML" | grep -oP "href=\"$BASE_URL/v\K([0-9a-zA-Z]+)")
CHAPTER=$(echo "$LINK_HTML" | grep -oP "href=\"$BASE_URL/v[0-9a-zA-Z]+/c\K([0-9.]+)")
# Fetch chapter
fetch_chapter $VOLUME $CHAPTER
done
}
####################
# Fetch chapter
####################
function fetch_chapter {
echo "CHAPTER $1/$2"
echo "-------------------------------------------"
echo ""
echo -n "Getting pages "
PAGE=1
NEXT_PAGE="2.html"
NEXT_PAGE_REGEX="[0-9].html"
BASE_FILENAME="${1}_${2}"
while [[ "$NEXT_PAGE" =~ $NEXT_PAGE_REGEX ]]
do
PAGE_NUMBER=$PAGE
if [[ $PAGE_NUMBER -lt 10 ]]
then
PAGE_NUMBER="0$PAGE_NUMBER"
fi
CHAPTER_URL="$BASE_URL/v$1/c$2/$PAGE.html"
CHAPTER_HTML=$(curl --silent --compressed $CHAPTER_URL)
IMG_FILENAME="${BASE_FILENAME}_${PAGE_NUMBER}.jpg"
# Find image
IMAGE_URL=$(echo $CHAPTER_HTML | grep -m 1 -oP "http://h.mfcdn.net/store/manga/[0-9.]+/[0-9a-zA-Z.-]+/compressed/[a-zA-Z0-9._-]+.jpg.token=[a-z0-9]+.ttl=[0-9]+" | head -1)
if [[ -z $IMAGE_URL ]]
then
echo "Image could not be found for URL $CHAPTER_URL"
exit 1
fi
echo -n "$PAGE"
# Download image
wget -q "$IMAGE_URL" -O $TITLE/_img/$IMG_FILENAME
# Find next link
NEXT_PAGE=$(echo $CHAPTER_HTML | grep -oP "<a href=\"\K([0-9]+).html\" class=\"btn next_page\">")
if [[ "$NEXT_PAGE" =~ $NEXT_PAGE_REGEX ]]
then
echo -n ", "
fi
((PAGE++))
done
echo ""
# Create PDF
echo "Creating PDF..."
convert $TITLE/_img/*.jpg $TITLE/$BASE_FILENAME.pdf
# Remove temp files
echo "Removing temporary files..."
rm -rf $TITLE/_img/*.jpg
echo ""
}
# Get options first
DRYRUN=false
TITLE=''
while getopts "ds:t:" FLAG
do
case $FLAG in
d) DRYRUN=true ;;
t) TITLE=$OPTARG ;;
*) error "Unexpected option ${FLAG}" ;;
esac
done
if [[ -z $TITLE ]]
then
echo "Title not provided"
exit 1
fi
# Remove and create directory
rm -rf "./$TITLE"
mkdir "./$TITLE"
mkdir "./$TITLE/_img"
# Generate URLs and start processing
BASE_URL=http://mangafox.me/manga/$TITLE
BASE_HTML=$(curl --silent --compressed $BASE_URL)
echo ""
echo "*********************************"
echo "*** Downloading $TITLE"
echo "*********************************"
echo ""
parse_chapters "$BASE_HTML"
rm -rf "./$TITLE/_img"
echo "DONE!"
# Exit
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment