Skip to content

Instantly share code, notes, and snippets.

@erikgregorywebb
Created October 8, 2018 00:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erikgregorywebb/80182b53a15096e6cdd24a874450a1cb to your computer and use it in GitHub Desktop.
Save erikgregorywebb/80182b53a15096e6cdd24a874450a1cb to your computer and use it in GitHub Desktop.
# import libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
# prepare the links
years = list(range(1971, 2019))
months = ['04', '10']
links = []
for year in years:
for month in months:
link = 'https://www.lds.org/general-conference/{}/{}?lang=eng'.format(year, month)
links.append(link)
# define the function
def SessionInfo(link):
driver = webdriver.Chrome(executable_path="/Users/erikgregorywebb/Downloads/chromedriver 2")
driver.get(link)
title_items = driver.find_elements_by_class_name("lumen-tile__title")
speaker_items = driver.find_elements_by_class_name("lumen-tile__content")
titles = []
speakers = []
for i in range(0, len(title_items)):
titles.append(title_items[i].text)
speakers.append(speaker_items[i].text)
links = [link] * len(titles)
session = [titles, speakers, links]
data = pd.DataFrame(session).transpose()
driver.close()
return data
# collect the data
all_sessions = pd.DataFrame()
for link in links:
time.sleep(3)
session = SessionInfo(link)
all_sessions = all_sessions.append(session, ignore_index=True)
# export
all_sessions.to_csv("all-sessions.csv", index = False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment