CHARITH1995/get-data.py

## get-data.py
for link in all_div:
	news_container = link.find_all("div",{"class" : "story-text"})
	for news in news_container:
		h2_tags = news.find_all("h2")
		for url in h2_tags:
			a_tags = url.find_all('a')
			for end_point in a_tags:
				#print(end_point.get("href"))
				url_ind = "http://sinhala.adaderana.lk/"+end_point.get("href")
				driver_ind = webdriver.Firefox(executable_path=r'D:\apps\anaconda\geckodriver.exe')
				driver_ind.get(url_ind)
				time.sleep(5)
				html = driver_ind.page_source
				soup = BeautifulSoup(html, 'lxml')
				type(soup)
				heading = soup.find_all("h1",{"class" : "news-heading"})
				title = heading[0].get_text().encode(encoding='UTF-8').strip()
				date = soup.find_all("p",{"class" : "news-datestamp english-font"})
				date = date[0].get_text().strip()
				#date = datetime.strptime(date, '%A, %d %B %Y - %I:%M %p')
				date = str(date)
				content = soup.find_all("div", { "class" : "news-content" })
				contents = content[0].get_text().encode(encoding='UTF-8').strip()
				courses_list=[]
				course=[title,date,contents]
				courses_list.append(course) # data on course append to a new array call courses_list
	for link in all_div:
	news_container = link.find_all("div",{"class" : "story-text"})
	for news in news_container:
	h2_tags = news.find_all("h2")
	for url in h2_tags:
	a_tags = url.find_all('a')
	for end_point in a_tags:
	#print(end_point.get("href"))
	url_ind = "http://sinhala.adaderana.lk/"+end_point.get("href")
	driver_ind = webdriver.Firefox(executable_path=r'D:\apps\anaconda\geckodriver.exe')
	driver_ind.get(url_ind)
	time.sleep(5)
	html = driver_ind.page_source
	soup = BeautifulSoup(html, 'lxml')
	type(soup)
	heading = soup.find_all("h1",{"class" : "news-heading"})
	title = heading[0].get_text().encode(encoding='UTF-8').strip()
	date = soup.find_all("p",{"class" : "news-datestamp english-font"})
	date = date[0].get_text().strip()
	#date = datetime.strptime(date, '%A, %d %B %Y - %I:%M %p')
	date = str(date)
	content = soup.find_all("div", { "class" : "news-content" })
	contents = content[0].get_text().encode(encoding='UTF-8').strip()
	courses_list=[]
	course=[title,date,contents]
	courses_list.append(course) # data on course append to a new array call courses_list