Skip to content

Instantly share code, notes, and snippets.

@aotantawy
Last active October 6, 2021 05:50
Show Gist options
  • Save aotantawy/73eccef4ddbd99a00baafc4cd3368f4b to your computer and use it in GitHub Desktop.
Save aotantawy/73eccef4ddbd99a00baafc4cd3368f4b to your computer and use it in GitHub Desktop.
A web scraping script for downloading quran files from quranbysubject.com
import requests
from bs4 import BeautifulSoup
def removeSpacesFromfileName(fileName):
return fileName.replace(" ","-")
mainURL = "https://quranbysubject.com/categories.php"
downloadURL = "https://quranbysubject.com/categorytext.php"
page = requests.get(mainURL)
soup = BeautifulSoup(page.content, "html.parser")
queryAllResults = soup.select("ul.large-block-grid-3 li")
for result in queryAllResults:
if result.find("h4") is None: continue
subCategories = result.select("div ul.list li a") # Get all sub-categories in the main page
for subCategory in subCategories:
fileName = removeSpacesFromfileName(subCategory.text.strip())
subCategoryURL = downloadURL + (subCategory["href"])[12:]
file = requests.get(subCategoryURL, allow_redirects=True)
open("./quran-files/"+fileName,"wb").write(file.content)
print("done")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment