aniruddha-adhikary/quran_scraper.py

## quran_scraper.py
#!/usr/bin/env python
#-*- coding: utf-8 -*-

import urllib2
from BeautifulSoup import BeautifulSoup

def main():
	linkslist = getAyahLinks()
	sura_count = 0
	for each_link in linkslist:
		sura_count = sura_count + 1
		writeAyah(each_link['href'], str(sura_count) + ".json", each_link.text)
		print "Sura %d has been scraped (%s)." % (sura_count, each_link.text)


def getAyahLinks():
	scrapeURL = "http://www.ourholyquran.com/index.php?option=com_content&view=article&id=53&Itemid=83"
	souplinks = BeautifulSoup(urllib2.urlopen(scrapeURL).read())
	return souplinks.find("ul", { "class" : "menu-arabic" }).findAll("a")


def writeAyah(ayah_url, outfile_name, sura_name):
	souptext = BeautifulSoup(urllib2.urlopen("http://www.ourholyquran.com" + ayah_url).read())
	ayahs = souptext.findAll("td", { "class" : "sura-arabic-text" })
	outfile = open(outfile_name, 'a')
	outfile.write("{\n\t\"name\": \"" + sura_name.encode('utf8') + "\"," + "\n\t\"ayahs\": [")

	for each_ayah in ayahs:
		outfile.write("\t\t\"" + each_ayah.text.replace("\"", "&quot;").encode('utf8') + "\",\n")

	outfile.write("\t]\n}")
	outfile.close()

if __name__ == "__main__":
	main()
	#!/usr/bin/env python
	#-- coding: utf-8 --

	import urllib2
	from BeautifulSoup import BeautifulSoup

	def main():
	linkslist = getAyahLinks()
	sura_count = 0
	for each_link in linkslist:
	sura_count = sura_count + 1
	writeAyah(each_link['href'], str(sura_count) + ".json", each_link.text)
	print "Sura %d has been scraped (%s)." % (sura_count, each_link.text)


	def getAyahLinks():
	scrapeURL = "http://www.ourholyquran.com/index.php?option=com_content&view=article&id=53&Itemid=83"
	souplinks = BeautifulSoup(urllib2.urlopen(scrapeURL).read())
	return souplinks.find("ul", { "class" : "menu-arabic" }).findAll("a")


	def writeAyah(ayah_url, outfile_name, sura_name):
	souptext = BeautifulSoup(urllib2.urlopen("http://www.ourholyquran.com" + ayah_url).read())
	ayahs = souptext.findAll("td", { "class" : "sura-arabic-text" })
	outfile = open(outfile_name, 'a')
	outfile.write("{\n\t\"name\": \"" + sura_name.encode('utf8') + "\"," + "\n\t\"ayahs\": [")

	for each_ayah in ayahs:
	outfile.write("\t\t\"" + each_ayah.text.replace("\"", """).encode('utf8') + "\",\n")

	outfile.write("\t]\n}")
	outfile.close()

	if __name__ == "__main__":
	main()