Last active
April 20, 2020 17:16
-
-
Save amard33p/5fcf9e5801c3ae9b07614c950876b6e4 to your computer and use it in GitHub Desktop.
Scrape all "Free" LeetCode Problems
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from webdriver_manager.chrome import ChromeDriverManager | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import Select | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
import atexit | |
BASEURL = "https://leetcode.com" | |
TOP_PROBLEMSET_URL = "/problemset/top-interview-questions/?difficulty=" | |
ALL_PROBLEMSET_URL = "/problemset/all/?difficulty=" | |
TBODY_LOCATOR = ( | |
By.XPATH, | |
'//*[@id="question-app"]/div/div[2]/div[2]/div[2]/table/tbody[1]', | |
) | |
PAGINATION_LOCATOR = (By.XPATH, "//span/select[@class='form-control']") | |
LOCK_ICON_LOCATOR = (By.XPATH, ".//div/span/span/i[contains(@class, 'fa-lock')]") | |
DIFFICULTY = "Easy" | |
class LeetCodeParser: | |
def __init__(self): | |
self.driver = webdriver.Chrome(ChromeDriverManager().install()) | |
self.driver.get(f"{BASEURL}") | |
self.driver.maximize_window() | |
atexit.register(self._quit) | |
def _select_no_paginate(self): | |
WebDriverWait(self.driver, 10).until( | |
EC.visibility_of_element_located(PAGINATION_LOCATOR) | |
) | |
select = Select(self.driver.find_element(*PAGINATION_LOCATOR)) | |
select.select_by_visible_text("all") | |
def fetch_top_problems(self): | |
""" | |
Get a list of https://leetcode.com/problemset/top-interview-questions | |
""" | |
self.driver.get(f"{BASEURL}{TOP_PROBLEMSET_URL}{DIFFICULTY}") | |
self._select_no_paginate() | |
_table = self.driver.find_element(*TBODY_LOCATOR) | |
_rows = _table.find_elements(By.TAG_NAME, "tr") | |
_top_problems = set() | |
for row in _rows: | |
_ = row.find_elements(By.TAG_NAME, "td")[1] | |
_top_problems.add(_.text) | |
return _top_problems | |
def fetch_all_unlocked_problems(self): | |
""" | |
Fetch all unlocked problems and write to file | |
""" | |
__top_problems = self.fetch_top_problems() | |
self.driver.get(f"{BASEURL}{ALL_PROBLEMSET_URL}{DIFFICULTY}") | |
self._select_no_paginate() | |
_table = self.driver.find_element(*TBODY_LOCATOR) | |
_rows = _table.find_elements(By.TAG_NAME, "tr") | |
with open(f"leetcode_{DIFFICULTY.lower()}.md", "w") as outfile: | |
outfile.write(f"## LeetCode {DIFFICULTY} Problems\n\n") | |
for row in _rows: | |
cells = row.find_elements(By.TAG_NAME, "td") | |
if not (len(cells[2].find_elements(*LOCK_ICON_LOCATOR))): | |
if cells[1].text in __top_problems: | |
outfile.write( | |
f"- [**{cells[1].text}: {cells[2].get_attribute('value')}**]({cells[2].find_element(By.TAG_NAME, 'a').get_attribute('href')})\n" | |
) | |
else: | |
outfile.write( | |
f"- [{cells[1].text}: {cells[2].get_attribute('value')}]({cells[2].find_element(By.TAG_NAME, 'a').get_attribute('href')})\n" | |
) | |
def _quit(self): | |
self.driver.quit() | |
lc = LeetCodeParser() | |
lc.fetch_all_unlocked_problems() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment