himan94/requirements.txt

## requirements.txt
beautifulsoup4==4.6.3
certifi==2018.10.15
chardet==3.0.4
idna==2.7
lxml==4.2.5
requests==2.20.1
selenium==3.141.0
urllib3==1.24.1

## scraper.py
# answer to https://stackoverflow.com/q/53475578/890242
import requests
from urllib.parse import urljoin
from multiprocessing.pool import ThreadPool, Pool
from bs4 import BeautifulSoup
from selenium import webdriver
import threading

def get_links(link):
  res = requests.get(link)
  soup = BeautifulSoup(res.text,"lxml")
  titles = [urljoin(url,items.get("href")) for items in soup.select(".summary .question-hyperlink")]
  return titles

threadLocal = threading.local()

def get_driver():
  driver = getattr(threadLocal, 'driver', None)
  if driver is None:
    chromeOptions = webdriver.ChromeOptions()
    chromeOptions.add_argument("--headless")
    driver = webdriver.Chrome(chrome_options=chromeOptions)
    setattr(threadLocal, 'driver', driver)
  return driver


def get_title(url):
  driver = get_driver()
  driver.get(url)
  sauce = BeautifulSoup(driver.page_source,"lxml")
  item = sauce.select_one("h1 a").text
  print(item)

if __name__ == '__main__':
  url = "https://stackoverflow.com/questions/tagged/web-scraping"
  ThreadPool(5).map(get_title,get_links(url))
	beautifulsoup4==4.6.3
	certifi==2018.10.15
	chardet==3.0.4
	idna==2.7
	lxml==4.2.5
	requests==2.20.1
	selenium==3.141.0
	urllib3==1.24.1
	# answer to https://stackoverflow.com/q/53475578/890242
	import requests
	from urllib.parse import urljoin
	from multiprocessing.pool import ThreadPool, Pool
	from bs4 import BeautifulSoup
	from selenium import webdriver
	import threading

	def get_links(link):
	res = requests.get(link)
	soup = BeautifulSoup(res.text,"lxml")
	titles = [urljoin(url,items.get("href")) for items in soup.select(".summary .question-hyperlink")]
	return titles

	threadLocal = threading.local()

	def get_driver():
	driver = getattr(threadLocal, 'driver', None)
	if driver is None:
	chromeOptions = webdriver.ChromeOptions()
	chromeOptions.add_argument("--headless")
	driver = webdriver.Chrome(chrome_options=chromeOptions)
	setattr(threadLocal, 'driver', driver)
	return driver


	def get_title(url):
	driver = get_driver()
	driver.get(url)
	sauce = BeautifulSoup(driver.page_source,"lxml")
	item = sauce.select_one("h1 a").text
	print(item)

	if __name__ == '__main__':
	url = "https://stackoverflow.com/questions/tagged/web-scraping"
	ThreadPool(5).map(get_title,get_links(url))