Skip to content

Instantly share code, notes, and snippets.

@onesixromcom
Created November 20, 2023 23:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save onesixromcom/b20c6701d92bc1579ec94edf63d7bde5 to your computer and use it in GitHub Desktop.
Save onesixromcom/b20c6701d92bc1579ec94edf63d7bde5 to your computer and use it in GitHub Desktop.
booknet_ua FB2 downloader
#!/bin/bash
# Store arguments in a special array
args=("$@")
URL=${args[0]}
# Check if link to page is present.
if [ -z "$URL" ]; then
echo "No url supplied. Please set collection name. (ex: https://booknet.ua/reader/smttyar-b155659)"
exit
fi
get_book_text()
{
cat $1 |
hxnormalize -x -e -s |
hxselect -i div#texts |
sed -e 's/<div.*>//g' -e 's/<\/div>//g' |
sed -e ':a;N;$!ba;s/\n//g' |
sed -e $'s/ / /g'
}
get_book_text_online()
{
echo $1 |
wget -O- -i- --no-verbose --quiet |
hxnormalize -x -e -s |
hxselect -i div#texts |
sed -e 's/<div.*>//g' -e 's/<\/div>//g' |
sed -e ':a;N;$!ba;s/\n//g' |
sed -e $'s/ / /g'
}
# Get META content attr by property attr.
get_meta_property()
{
grep -r ".*<meta property=\"$2\" content=\"\(.*\)\"" $1 |
sed -e "s/.* content=\"\(.*\)\".*/\1/"
}
# Get META content attr by name attr.
get_meta_name()
{
grep -r ".*<meta name=\"$2\" content=\"\(.*\)\"" $1|
sed -e "s/.* content=\"\(.*\)\".*/\1/"
}
# Get cookie from curl saved cookies file.
get_cookie()
{
cat $1 |
grep ".*$2" | cut -f7
}
get_book_author()
{
cat $1 |
hxnormalize -x -e -s |
hxselect -i a.sa-name |
sed -e 's/<[^>]*>//g' -e 's/\r//'
}
get_book_genre()
{
cat $1 |
hxnormalize -x -e -s |
hxselect -i div.col-md-12.jsAddTargetBlank a |
tr -d '\n' | hxpipe | awk -F "^-" '{print $2}' | grep "\S"
}
get_chapters_list()
{
cat $1 |
hxnormalize -x -e -s |
hxselect -i select.js-chapter-change |
sed 's/<option/<a/g' | #replacements to make hxwls work
sed 's/option>/a>/g' | #replacements to make hxwls work
sed 's/value=/href=/g' | #replacements to make hxwls work
hxwls
}
get_chapters_names()
{
cat $1 |
hxnormalize -x -e -s |
hxselect -i select.js-chapter-change |
sed -e 's/<select[^>]*>//g' |
sed 's/<option/<a/g' |
sed 's/option>/a>/g' |
sed 's/value=/href=/g' |
tr -d '\n' | hxpipe | awk -F "-" '{print $2}' | grep "\S"
}
# $1 - URL
# $2 - page url with chapter
# $3 - page num
get_ajax_page()
{
curl 'https://booknet.ua/reader/get-page' \
-H 'accept: application/json, text/javascript, */*; q=0.01' \
-H 'content-type: application/x-www-form-urlencoded; charset=UTF-8' \
-H "cookie: _csrf=$CSRF_COOKIE;" \
-H "referer: $1?c=$2" \
-H "x-csrf-token: $CSRF_TOKEN" \
-o "$AJAX_RESPONSE_FILE" \
-X POST \
--silent \
--data-raw "chapterId=$2&page=$3&_csrf=$CSRF_TOKEN"
}
# Simple solution to get json value by key.
get_json_val()
{
cat $1 | \
php -r "echo json_decode(file_get_contents('php://stdin'))->$2 ?? '';"
}
write_fb2_header()
{
if [ -f "$FILENAME" ]; then
rm "$FILENAME"
fi
touch "$FILENAME";
echo '<?xml version="1.0" encoding="utf-8"?><FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">' > "$FILENAME";
}
write_fb2_footer()
{
echo '</section></body>' >> "$FILENAME";
if [ ! -z "$IMG" ]; then
echo '<binary id="cover.jpg" content-type="image/jpeg">' >> "$FILENAME";
wget -O ./cover.jpg --no-verbose --quiet $IMG
base64 ./cover.jpg >> "$FILENAME";
echo '</binary>' >> "$FILENAME";
fi
echo '</FictionBook>' >> "$FILENAME";
}
write_fb2_description()
{
echo '<description><title-info>' >> "$FILENAME";
echo "<book-title>$1</book-title>" >> "$FILENAME";
echo "<genre>$3</genre>" >> "$FILENAME";
echo "<author><first-name>$2</first-name><middle-name></middle-name><last-name></last-name></author>" >> "$FILENAME";
echo '<from>Downloaded from booknet.ua. Converted by 16rom.com</from>' >> "$FILENAME";
echo "<annotation><p>$4</p></annotation>" >> "$FILENAME";
if [ ! -z "$IMG" ]; then
echo '<coverpage><image l:href="#cover.jpg"></image></coverpage>' >> "$FILENAME";
fi
echo '<lang>ua</lang></title-info></description><body xmlns:fb="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink"><section>' >> "$FILENAME";
}
write_fb2_text()
{
echo "$1" >> "$FILENAME";
}
token_refresh()
{
curl -o "$PAGE_HTML" --silent --no-verbose --cookie-jar $COOKIES_FILE $URL
CSRF_TOKEN=$(get_meta_name $PAGE_HTML "csrf-token")
CSRF_COOKIE=$(get_cookie $COOKIES_FILE "_csrf")
}
#================== START ==================
# Edit url to reader page.
echo "booknet.ua downloader is starting..."
URL=$(echo "$URL" | sed "s/\/book\//\/reader\//")
FILENAME=$(echo "$URL" | sed "s/https:\/\/booknet.ua\/reader\///" | sed 's/.html.*//' | sed -e 's/^[0-9]\+-*//g').fb2
COOKIES_FILE="./cookies.txt"
AJAX_RESPONSE_FILE="./ajax_response.json"
PAGE_HTML="./book_ua_page.html"
if [ -f "$PAGE_HTML" ]; then rm "$PAGE_HTML"; fi
if [ -f "./cover.jpg" ]; then rm "./cover.jpg"; fi
token_refresh
echo "Book will be saved to $FILENAME"
IMG=$(get_meta_property $PAGE_HTML "og:image")
BOOK_TITLE=$(get_meta_property $PAGE_HTML "og:title")
BOOK_DESCRIPTION=$(get_meta_property $PAGE_HTML "og:description")
BOOK_AUTHOR=$(get_book_author $PAGE_HTML)
echo $BOOK_AUTHOR
echo $BOOK_TITLE
echo $BOOK_DESCRIPTION
readarray -t BOOK_GENRE_TMP < <(get_book_genre $PAGE_HTML)
IFS=\, eval 'BOOK_GENRE="${BOOK_GENRE_TMP[*]}"'
echo $BOOK_GENRE
CHAPTERS=($(get_chapters_list $PAGE_HTML))
readarray -t CHAPTERS_NAMES < <(get_chapters_names $PAGE_HTML)
if [ -z "$CHAPTERS" ]; then
echo "No chapters were found! exit";
exit;
fi
# Check if all book is available.
CHAPTERS_TOTAL="${#CHAPTERS[@]}"
CHAPTERS_TOTAL=$((CHAPTERS_TOTAL-1))
get_ajax_page $URL ${CHAPTERS[$CHAPTERS_TOTAL]} 1
TOTALPAGES=$(get_json_val $AJAX_RESPONSE_FILE totalPages)
if [ -z "$TOTALPAGES" ]; then
echo "Complete book is not available. You should pay for it."
exit;
fi
write_fb2_header
write_fb2_description "$BOOK_TITLE" "$BOOK_AUTHOR" "$BOOK_GENRE" "$BOOK_DESCRIPTION"
CHAPTER_NUM=0
echo "Processing ${#CHAPTERS[@]} chapters."
for chapter in "${CHAPTERS[@]}";
do
echo "Process chapter: $CHAPTER_NUM - ${CHAPTERS_NAMES[$CHAPTER_NUM]}, page: 1"
# Write section start.
write_fb2_text "<section><title><p>${CHAPTERS_NAMES[$CHAPTER_NUM]}</p></title>"
get_ajax_page $URL $chapter 1
TOTALPAGES=$(get_json_val $AJAX_RESPONSE_FILE totalPages)
if [ -z "$TOTALPAGES" ]; then
echo 'Lets try to refresh csrf tokens';
sleep 120
token_refresh
get_ajax_page $URL $chapter 1
TOTALPAGES=$(get_json_val $AJAX_RESPONSE_FILE totalPages)
if [ -z "$TOTALPAGES" ]; then
echo "Total pages not found. Fatal error. Book could be not free. Exit.";
exit;
fi
fi
# Write data text.
write_fb2_text "$(get_json_val $AJAX_RESPONSE_FILE data)"
if [ "$TOTALPAGES" -gt 1 ]; then
echo "chapter has pages: $TOTALPAGES"
for (( i=2; i <= $TOTALPAGES; ++i ))
do
echo "Process chapter: $CHAPTER_NUM page: $i"
get_ajax_page $URL $chapter $i
write_fb2_text "$(get_json_val $AJAX_RESPONSE_FILE data)"
#sleep 2
done
fi
# Write section end.
CHAPTER_NUM=$((CHAPTER_NUM+1))
write_fb2_text "</section>"
#sleep 2
done
write_fb2_footer
echo "booknet.ua downloader finished."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment