gitdagray/kanview_selenium3.py

## kanview_selenium3.py
#Beautiful Soup grabs all Job Title links
for link in soup_level1.find_all('a', id=re.compile("^MainContent_uxLevel2_JobTitles_uxJobTitleBtn_")):

    #Selenium visits each Job Title page
    python_button = driver.find_element_by_id('MainContent_uxLevel2_JobTitles_uxJobTitleBtn_' + str(x))
    python_button.click() #click link

    #Selenium hands of the source of the specific job page to Beautiful Soup
    soup_level2=BeautifulSoup(driver.page_source, 'lxml')

    #Beautiful Soup grabs the HTML table on the page
    table = soup_level2.find_all('table')[0]

    #Giving the HTML table to pandas to put in a dataframe object
    df = pd.read_html(str(table),header=0)

    #Store the dataframe in a list
    datalist.append(df[0])

    #Ask Selenium to click the back button
    driver.execute_script("window.history.go(-1)")

    #increment the counter variable before starting the loop over
    x += 1
	#Beautiful Soup grabs all Job Title links
	for link in soup_level1.find_all('a', id=re.compile("^MainContent_uxLevel2_JobTitles_uxJobTitleBtn_")):

	#Selenium visits each Job Title page
	python_button = driver.find_element_by_id('MainContent_uxLevel2_JobTitles_uxJobTitleBtn_' + str(x))
	python_button.click() #click link

	#Selenium hands of the source of the specific job page to Beautiful Soup
	soup_level2=BeautifulSoup(driver.page_source, 'lxml')

	#Beautiful Soup grabs the HTML table on the page
	table = soup_level2.find_all('table')[0]

	#Giving the HTML table to pandas to put in a dataframe object
	df = pd.read_html(str(table),header=0)

	#Store the dataframe in a list
	datalist.append(df[0])

	#Ask Selenium to click the back button
	driver.execute_script("window.history.go(-1)")

	#increment the counter variable before starting the loop over
	x += 1