# import libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
# prepare the links
years = list(range(1971, 2019))
months = ['04', '10']
links = []
for year in years:
for month in months:
link = '{}/{}?lang=eng'.format(year, month)
# define the function
def SessionInfo(link):
driver = webdriver.Chrome(executable_path="/Users/erikgregorywebb/Downloads/chromedriver 2")
title_items = driver.find_elements_by_class_name("lumen-tile__title")
speaker_items = driver.find_elements_by_class_name("lumen-tile__content")
titles = []
speakers = []
for i in range(0, len(title_items)):
links = [link] * len(titles)
session = [titles, speakers, links]
data = pd.DataFrame(session).transpose()
return data
# collect the data
all_sessions = pd.DataFrame()
for link in links:
session = SessionInfo(link)
all_sessions = all_sessions.append(session, ignore_index=True)
# export
all_sessions.to_csv("all-sessions.csv", index = False)
