Skip to content

Instantly share code, notes, and snippets.

@jkotra
Created November 25, 2019 07:11
Show Gist options
  • Save jkotra/9de90f173ffa8138633040542b004211 to your computer and use it in GitHub Desktop.
Save jkotra/9de90f173ffa8138633040542b004211 to your computer and use it in GitHub Desktop.
Medium Scraper
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from bs4 import BeautifulSoup
class Scraper:
def __init__(self):
self.options = Options()
self.options.add_argument('--headless')
self.driver = webdriver.Firefox(options=self.options)
def get(self,link):
self.driver.get(link)
data = self.driver.execute_script('return document.body.innerHTML')
self.driver.close()
soup = BeautifulSoup(data, "lxml")
all_sections = soup.find_all("section")
for section in all_sections:
for paragraph in section.find_all({"h1","p"})[1:-1]:
print(paragraph.text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment