Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# import libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
# prepare the links
years = list(range(1971, 2019))
months = ['04', '10']
links = []
for year in years:
for month in months:
link = 'https://www.lds.org/general-conference/{}/{}?lang=eng'.format(year, month)
links.append(link)
# define the function
def SessionInfo(link):
driver = webdriver.Chrome(executable_path="/Users/erikgregorywebb/Downloads/chromedriver 2")
driver.get(link)
title_items = driver.find_elements_by_class_name("lumen-tile__title")
speaker_items = driver.find_elements_by_class_name("lumen-tile__content")
titles = []
speakers = []
for i in range(0, len(title_items)):
titles.append(title_items[i].text)
speakers.append(speaker_items[i].text)
links = [link] * len(titles)
session = [titles, speakers, links]
data = pd.DataFrame(session).transpose()
driver.close()
return data
# collect the data
all_sessions = pd.DataFrame()
for link in links:
time.sleep(3)
session = SessionInfo(link)
all_sessions = all_sessions.append(session, ignore_index=True)
# export
all_sessions.to_csv("all-sessions.csv", index = False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.