Skip to content

Instantly share code, notes, and snippets.

@CHARITH1995
Last active July 17, 2020 08:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CHARITH1995/4cc3cbe3a51805dc3204fc2385b33a6d to your computer and use it in GitHub Desktop.
Save CHARITH1995/4cc3cbe3a51805dc3204fc2385b33a6d to your computer and use it in GitHub Desktop.
get data on <h>,<a> .. etc
for link in all_div:
news_container = link.find_all("div",{"class" : "story-text"})
for news in news_container:
h2_tags = news.find_all("h2")
for url in h2_tags:
a_tags = url.find_all('a')
for end_point in a_tags:
#print(end_point.get("href"))
url_ind = "http://sinhala.adaderana.lk/"+end_point.get("href")
driver_ind = webdriver.Firefox(executable_path=r'D:\apps\anaconda\geckodriver.exe')
driver_ind.get(url_ind)
time.sleep(5)
html = driver_ind.page_source
soup = BeautifulSoup(html, 'lxml')
type(soup)
heading = soup.find_all("h1",{"class" : "news-heading"})
title = heading[0].get_text().encode(encoding='UTF-8').strip()
date = soup.find_all("p",{"class" : "news-datestamp english-font"})
date = date[0].get_text().strip()
#date = datetime.strptime(date, '%A, %d %B %Y - %I:%M %p')
date = str(date)
content = soup.find_all("div", { "class" : "news-content" })
contents = content[0].get_text().encode(encoding='UTF-8').strip()
courses_list=[]
course=[title,date,contents]
courses_list.append(course) # data on course append to a new array call courses_list
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment