Skip to content

Instantly share code, notes, and snippets.

@FernandoCelmer
Last active March 24, 2023 16:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FernandoCelmer/7321913d59a5a9bf7249f57c4428430a to your computer and use it in GitHub Desktop.
Save FernandoCelmer/7321913d59a5a9bf7249f57c4428430a to your computer and use it in GitHub Desktop.
"""
selenium==4.6.0
webdriver-manager==3.8.4
packaging==21.3
"""
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
def get_chrome_browser(headless: bool = False):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
if headless:
chrome_options.add_argument("--headless")
executable_path = ChromeDriverManager().install()
return webdriver.Chrome(
executable_path=executable_path,
chrome_options=chrome_options
)
if __name__ == "__main__":
packages_list = []
driver = get_chrome_browser(headless=True)
package_name = input("Package: ")
pages = input("Pages: ")
driver.get(f"https://pypi.org/search/?q={package_name}")
def append_list():
packages = driver.find_elements(By.CLASS_NAME, "package-snippet")
for package in packages:
pdict = dict(
name=package.find_element(By.CLASS_NAME, "package-snippet__name").text,
description=package.find_element(By.CLASS_NAME, "package-snippet__description").text)
packages_list.append(pdict)
print(pdict.get("name"))
append_list()
for page in range(int(pages)):
try:
next_page = driver.find_element(By.LINK_TEXT, "Next")
next_page.click()
append_list()
except Exception as error:
pass
with open(file="packages.json", mode="w", encoding='utf8') as outfile:
outfile.write(json.dumps(packages_list, indent=4))
selenium==4.6.0
webdriver-manager==3.8.4
packaging==21.3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment