yshalsager/albabtainlibrary.py

## albabtainlibrary.py
# pip install httpx parsel
from pathlib import Path

from httpx import get
from parsel import Selector

books_urls = Path('books_list.txt').read_text().splitlines()
download_urls = Path('download_urls.txt')
if download_urls.exists():
  download_urls.unlink()

for book_url in books_urls:
  page = Selector(text=get(book_url).text)
  #                 <iframe class="wonderplugin-pdf-iframe" src="https://www.albabtainlibrary.org/wp-content/plugins/wonderplugin-pdf-embed/pdfjslight/web/viewer.html?v=2&disabledownload=1&disableprint=1&disabletext=1&disabledoc=1&disableopenfile=1&disabletoolbar=1&disablerightclick=1&file=https://www.albabtainlibrary.org/wp-content/uploads/2023/08/Ibn-tamiya-2-final.pdf" width="100%" height="600px" style="border:0;"></iframe>
  # Get the iframe src
  iframe_src = page.css('iframe::attr(src)').get("")
  # Get the pdf file url
  pdf_url = iframe_src.split('file=')[-1]
  print(pdf_url)
  download_urls.open(mode='a').write(pdf_url + '\n')
	# pip install httpx parsel
	from pathlib import Path

	from httpx import get
	from parsel import Selector

	books_urls = Path('books_list.txt').read_text().splitlines()
	download_urls = Path('download_urls.txt')
	if download_urls.exists():
	download_urls.unlink()

	for book_url in books_urls:
	page = Selector(text=get(book_url).text)
	# <iframe class="wonderplugin-pdf-iframe" src="https://www.albabtainlibrary.org/wp-content/plugins/wonderplugin-pdf-embed/pdfjslight/web/viewer.html?v=2&disabledownload=1&disableprint=1&disabletext=1&disabledoc=1&disableopenfile=1&disabletoolbar=1&disablerightclick=1&file=https://www.albabtainlibrary.org/wp-content/uploads/2023/08/Ibn-tamiya-2-final.pdf" width="100%" height="600px" style="border:0;"></iframe>
	# Get the iframe src
	iframe_src = page.css('iframe::attr(src)').get("")
	# Get the pdf file url
	pdf_url = iframe_src.split('file=')[-1]
	print(pdf_url)
	download_urls.open(mode='a').write(pdf_url + '\n')