Skip to content

Instantly share code, notes, and snippets.

@ThePyProgrammer
Created February 27, 2024 12:59
Show Gist options
  • Save ThePyProgrammer/c69bcca827c9509486256b081090abc3 to your computer and use it in GitHub Desktop.
Save ThePyProgrammer/c69bcca827c9509486256b081090abc3 to your computer and use it in GitHub Desktop.
Track down all Devpost Hackathon Projects via Participant List (when project gallery isn't released)
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from bs4 import BeautifulSoup
from tqdm import tqdm
import pandas as pd
# Set up the Chrome WebDriver
driver = webdriver.Chrome()
# Open Hackathon Webpage
HACKATHON_URL = "https://hack4good-2024.devpost.com" # placeholder, you can try whatever
URL = f"{HACKATHON_URL}/participants"
driver.get(URL)
# Scroll to the bottom to load all participants (note, change `scroll_count` value as you see fit to get to the end)
scroll_count = 15
for _ in tqdm(range(scroll_count)):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2) # Wait for the new results to load
soup = BeautifulSoup(driver.page_source, 'html.parser')
search_results = soup.find_all('div', class_='participant')
users = []
for res in search_results:
project_count = int(res.find(class_="participant-software-count").strong.get_text())
name = res.h5.get_text()
if(project_count == 0):
print(name, "had no projects, skipping")
continue
try:
url = res.a.attrs['href']
users.append({ "name": name, "url": url })
except Exception as e:
print(name, e)
project_set = set()
for i, res in enumerate(tqdm(users)):
name, url = res["name"], res["url"]
driver.get(url)
soup = BeautifulSoup(driver.page_source, 'html.parser')
projects = soup.find_all("div", class_="gallery-item")
for project in projects:
project_url = project.a.attrs["href"]
project_set.add(project_url)
time.sleep(0.5)
project_urls = list(project_set)
projects = []
for i, url in enumerate(tqdm(project_urls)):
driver.get(url)
soup = BeautifulSoup(driver.page_source, 'html.parser')
if len(soup.find_all("a", href=f"{HACKATHON_URL}/")):
title = soup.find("h1", id="app-title").get_text()
description = soup.find("p", class_="large").get_text().strip()
projects.append({
"title": title,
"description": description,
"url": url
})
print(i, title, url)
time.sleep(0.5)
# Stores all the data to an excel file for easy access :)
pd.DataFrame(projects).to_excel("projects.xlsx", index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment