Skip to content

Instantly share code, notes, and snippets.

@rajacsp
Last active July 9, 2018 23:57
Show Gist options
  • Save rajacsp/d948fc8435851e53ab0ab94b03207763 to your computer and use it in GitHub Desktop.
Save rajacsp/d948fc8435851e53ab0ab94b03207763 to your computer and use it in GitHub Desktop.
Title Collector in Python
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def startpy():
phantomjs_path = r'C:\Phantom\phantomjs-2.1.1-windows\bin\phantomjs.exe';
driver = webdriver.PhantomJS(phantomjs_path)
wait = WebDriverWait(driver, 10)
url = 'https://technet.microsoft.com/en-us/library/aa995718(v=exchg.65).aspx'
driver.get(url)
pagehtml = driver.page_source;
soup = BeautifulSoup(pagehtml, "html5lib")
titles = soup.findAll('h1');
for child in titles:
print(child.get_text());
if __name__ == '__main__':
startpy()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment