Created
November 20, 2023 23:07
-
-
Save onesixromcom/b20c6701d92bc1579ec94edf63d7bde5 to your computer and use it in GitHub Desktop.
booknet_ua FB2 downloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Store arguments in a special array | |
args=("$@") | |
URL=${args[0]} | |
# Check if link to page is present. | |
if [ -z "$URL" ]; then | |
echo "No url supplied. Please set collection name. (ex: https://booknet.ua/reader/smttyar-b155659)" | |
exit | |
fi | |
get_book_text() | |
{ | |
cat $1 | | |
hxnormalize -x -e -s | | |
hxselect -i div#texts | | |
sed -e 's/<div.*>//g' -e 's/<\/div>//g' | | |
sed -e ':a;N;$!ba;s/\n//g' | | |
sed -e $'s/ / /g' | |
} | |
get_book_text_online() | |
{ | |
echo $1 | | |
wget -O- -i- --no-verbose --quiet | | |
hxnormalize -x -e -s | | |
hxselect -i div#texts | | |
sed -e 's/<div.*>//g' -e 's/<\/div>//g' | | |
sed -e ':a;N;$!ba;s/\n//g' | | |
sed -e $'s/ / /g' | |
} | |
# Get META content attr by property attr. | |
get_meta_property() | |
{ | |
grep -r ".*<meta property=\"$2\" content=\"\(.*\)\"" $1 | | |
sed -e "s/.* content=\"\(.*\)\".*/\1/" | |
} | |
# Get META content attr by name attr. | |
get_meta_name() | |
{ | |
grep -r ".*<meta name=\"$2\" content=\"\(.*\)\"" $1| | |
sed -e "s/.* content=\"\(.*\)\".*/\1/" | |
} | |
# Get cookie from curl saved cookies file. | |
get_cookie() | |
{ | |
cat $1 | | |
grep ".*$2" | cut -f7 | |
} | |
get_book_author() | |
{ | |
cat $1 | | |
hxnormalize -x -e -s | | |
hxselect -i a.sa-name | | |
sed -e 's/<[^>]*>//g' -e 's/\r//' | |
} | |
get_book_genre() | |
{ | |
cat $1 | | |
hxnormalize -x -e -s | | |
hxselect -i div.col-md-12.jsAddTargetBlank a | | |
tr -d '\n' | hxpipe | awk -F "^-" '{print $2}' | grep "\S" | |
} | |
get_chapters_list() | |
{ | |
cat $1 | | |
hxnormalize -x -e -s | | |
hxselect -i select.js-chapter-change | | |
sed 's/<option/<a/g' | #replacements to make hxwls work | |
sed 's/option>/a>/g' | #replacements to make hxwls work | |
sed 's/value=/href=/g' | #replacements to make hxwls work | |
hxwls | |
} | |
get_chapters_names() | |
{ | |
cat $1 | | |
hxnormalize -x -e -s | | |
hxselect -i select.js-chapter-change | | |
sed -e 's/<select[^>]*>//g' | | |
sed 's/<option/<a/g' | | |
sed 's/option>/a>/g' | | |
sed 's/value=/href=/g' | | |
tr -d '\n' | hxpipe | awk -F "-" '{print $2}' | grep "\S" | |
} | |
# $1 - URL | |
# $2 - page url with chapter | |
# $3 - page num | |
get_ajax_page() | |
{ | |
curl 'https://booknet.ua/reader/get-page' \ | |
-H 'accept: application/json, text/javascript, */*; q=0.01' \ | |
-H 'content-type: application/x-www-form-urlencoded; charset=UTF-8' \ | |
-H "cookie: _csrf=$CSRF_COOKIE;" \ | |
-H "referer: $1?c=$2" \ | |
-H "x-csrf-token: $CSRF_TOKEN" \ | |
-o "$AJAX_RESPONSE_FILE" \ | |
-X POST \ | |
--silent \ | |
--data-raw "chapterId=$2&page=$3&_csrf=$CSRF_TOKEN" | |
} | |
# Simple solution to get json value by key. | |
get_json_val() | |
{ | |
cat $1 | \ | |
php -r "echo json_decode(file_get_contents('php://stdin'))->$2 ?? '';" | |
} | |
write_fb2_header() | |
{ | |
if [ -f "$FILENAME" ]; then | |
rm "$FILENAME" | |
fi | |
touch "$FILENAME"; | |
echo '<?xml version="1.0" encoding="utf-8"?><FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">' > "$FILENAME"; | |
} | |
write_fb2_footer() | |
{ | |
echo '</section></body>' >> "$FILENAME"; | |
if [ ! -z "$IMG" ]; then | |
echo '<binary id="cover.jpg" content-type="image/jpeg">' >> "$FILENAME"; | |
wget -O ./cover.jpg --no-verbose --quiet $IMG | |
base64 ./cover.jpg >> "$FILENAME"; | |
echo '</binary>' >> "$FILENAME"; | |
fi | |
echo '</FictionBook>' >> "$FILENAME"; | |
} | |
write_fb2_description() | |
{ | |
echo '<description><title-info>' >> "$FILENAME"; | |
echo "<book-title>$1</book-title>" >> "$FILENAME"; | |
echo "<genre>$3</genre>" >> "$FILENAME"; | |
echo "<author><first-name>$2</first-name><middle-name></middle-name><last-name></last-name></author>" >> "$FILENAME"; | |
echo '<from>Downloaded from booknet.ua. Converted by 16rom.com</from>' >> "$FILENAME"; | |
echo "<annotation><p>$4</p></annotation>" >> "$FILENAME"; | |
if [ ! -z "$IMG" ]; then | |
echo '<coverpage><image l:href="#cover.jpg"></image></coverpage>' >> "$FILENAME"; | |
fi | |
echo '<lang>ua</lang></title-info></description><body xmlns:fb="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:xlink="http://www.w3.org/1999/xlink"><section>' >> "$FILENAME"; | |
} | |
write_fb2_text() | |
{ | |
echo "$1" >> "$FILENAME"; | |
} | |
token_refresh() | |
{ | |
curl -o "$PAGE_HTML" --silent --no-verbose --cookie-jar $COOKIES_FILE $URL | |
CSRF_TOKEN=$(get_meta_name $PAGE_HTML "csrf-token") | |
CSRF_COOKIE=$(get_cookie $COOKIES_FILE "_csrf") | |
} | |
#================== START ================== | |
# Edit url to reader page. | |
echo "booknet.ua downloader is starting..." | |
URL=$(echo "$URL" | sed "s/\/book\//\/reader\//") | |
FILENAME=$(echo "$URL" | sed "s/https:\/\/booknet.ua\/reader\///" | sed 's/.html.*//' | sed -e 's/^[0-9]\+-*//g').fb2 | |
COOKIES_FILE="./cookies.txt" | |
AJAX_RESPONSE_FILE="./ajax_response.json" | |
PAGE_HTML="./book_ua_page.html" | |
if [ -f "$PAGE_HTML" ]; then rm "$PAGE_HTML"; fi | |
if [ -f "./cover.jpg" ]; then rm "./cover.jpg"; fi | |
token_refresh | |
echo "Book will be saved to $FILENAME" | |
IMG=$(get_meta_property $PAGE_HTML "og:image") | |
BOOK_TITLE=$(get_meta_property $PAGE_HTML "og:title") | |
BOOK_DESCRIPTION=$(get_meta_property $PAGE_HTML "og:description") | |
BOOK_AUTHOR=$(get_book_author $PAGE_HTML) | |
echo $BOOK_AUTHOR | |
echo $BOOK_TITLE | |
echo $BOOK_DESCRIPTION | |
readarray -t BOOK_GENRE_TMP < <(get_book_genre $PAGE_HTML) | |
IFS=\, eval 'BOOK_GENRE="${BOOK_GENRE_TMP[*]}"' | |
echo $BOOK_GENRE | |
CHAPTERS=($(get_chapters_list $PAGE_HTML)) | |
readarray -t CHAPTERS_NAMES < <(get_chapters_names $PAGE_HTML) | |
if [ -z "$CHAPTERS" ]; then | |
echo "No chapters were found! exit"; | |
exit; | |
fi | |
# Check if all book is available. | |
CHAPTERS_TOTAL="${#CHAPTERS[@]}" | |
CHAPTERS_TOTAL=$((CHAPTERS_TOTAL-1)) | |
get_ajax_page $URL ${CHAPTERS[$CHAPTERS_TOTAL]} 1 | |
TOTALPAGES=$(get_json_val $AJAX_RESPONSE_FILE totalPages) | |
if [ -z "$TOTALPAGES" ]; then | |
echo "Complete book is not available. You should pay for it." | |
exit; | |
fi | |
write_fb2_header | |
write_fb2_description "$BOOK_TITLE" "$BOOK_AUTHOR" "$BOOK_GENRE" "$BOOK_DESCRIPTION" | |
CHAPTER_NUM=0 | |
echo "Processing ${#CHAPTERS[@]} chapters." | |
for chapter in "${CHAPTERS[@]}"; | |
do | |
echo "Process chapter: $CHAPTER_NUM - ${CHAPTERS_NAMES[$CHAPTER_NUM]}, page: 1" | |
# Write section start. | |
write_fb2_text "<section><title><p>${CHAPTERS_NAMES[$CHAPTER_NUM]}</p></title>" | |
get_ajax_page $URL $chapter 1 | |
TOTALPAGES=$(get_json_val $AJAX_RESPONSE_FILE totalPages) | |
if [ -z "$TOTALPAGES" ]; then | |
echo 'Lets try to refresh csrf tokens'; | |
sleep 120 | |
token_refresh | |
get_ajax_page $URL $chapter 1 | |
TOTALPAGES=$(get_json_val $AJAX_RESPONSE_FILE totalPages) | |
if [ -z "$TOTALPAGES" ]; then | |
echo "Total pages not found. Fatal error. Book could be not free. Exit."; | |
exit; | |
fi | |
fi | |
# Write data text. | |
write_fb2_text "$(get_json_val $AJAX_RESPONSE_FILE data)" | |
if [ "$TOTALPAGES" -gt 1 ]; then | |
echo "chapter has pages: $TOTALPAGES" | |
for (( i=2; i <= $TOTALPAGES; ++i )) | |
do | |
echo "Process chapter: $CHAPTER_NUM page: $i" | |
get_ajax_page $URL $chapter $i | |
write_fb2_text "$(get_json_val $AJAX_RESPONSE_FILE data)" | |
#sleep 2 | |
done | |
fi | |
# Write section end. | |
CHAPTER_NUM=$((CHAPTER_NUM+1)) | |
write_fb2_text "</section>" | |
#sleep 2 | |
done | |
write_fb2_footer | |
echo "booknet.ua downloader finished." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment