ThePyProgrammer/devpost.py

## devpost.py
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from bs4 import BeautifulSoup
from tqdm import tqdm
import pandas as pd

# Set up the Chrome WebDriver
driver = webdriver.Chrome()

# Open Hackathon Webpage
HACKATHON_URL = "https://hack4good-2024.devpost.com" # placeholder, you can try whatever
URL = f"{HACKATHON_URL}/participants"
driver.get(URL)

# Scroll to the bottom to load all participants (note, change `scroll_count` value as you see fit to get to the end)
scroll_count = 15
for _ in tqdm(range(scroll_count)):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)  # Wait for the new results to load

soup = BeautifulSoup(driver.page_source, 'html.parser')
search_results = soup.find_all('div', class_='participant')

users = []

for res in search_results:
    project_count = int(res.find(class_="participant-software-count").strong.get_text())
    name = res.h5.get_text()
    if(project_count == 0):
        print(name, "had no projects, skipping")
        continue
    try:
        url = res.a.attrs['href']
        users.append({ "name": name, "url": url })
    except Exception as e:
        print(name, e)

project_set = set()

for i, res in enumerate(tqdm(users)):
    name, url = res["name"], res["url"]
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    projects = soup.find_all("div", class_="gallery-item")
    for project in projects:
        project_url = project.a.attrs["href"]
        project_set.add(project_url)
    time.sleep(0.5)

project_urls = list(project_set)

projects = []

for i, url in enumerate(tqdm(project_urls)):
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    if len(soup.find_all("a", href=f"{HACKATHON_URL}/")):
        title = soup.find("h1", id="app-title").get_text()
        description = soup.find("p", class_="large").get_text().strip()
        projects.append({
            "title": title,
            "description": description,
            "url": url
        })
        print(i, title, url)
    time.sleep(0.5)

# Stores all the data to an excel file for easy access :)
pd.DataFrame(projects).to_excel("projects.xlsx", index=False)
	import time
	from selenium import webdriver
	from selenium.webdriver.chrome.service import Service as ChromeService
	from bs4 import BeautifulSoup
	from tqdm import tqdm
	import pandas as pd

	# Set up the Chrome WebDriver
	driver = webdriver.Chrome()

	# Open Hackathon Webpage
	HACKATHON_URL = "https://hack4good-2024.devpost.com" # placeholder, you can try whatever
	URL = f"{HACKATHON_URL}/participants"
	driver.get(URL)

	# Scroll to the bottom to load all participants (note, change `scroll_count` value as you see fit to get to the end)
	scroll_count = 15
	for _ in tqdm(range(scroll_count)):
	driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
	time.sleep(2) # Wait for the new results to load

	soup = BeautifulSoup(driver.page_source, 'html.parser')
	search_results = soup.find_all('div', class_='participant')

	users = []

	for res in search_results:
	project_count = int(res.find(class_="participant-software-count").strong.get_text())
	name = res.h5.get_text()
	if(project_count == 0):
	print(name, "had no projects, skipping")
	continue
	try:
	url = res.a.attrs['href']
	users.append({ "name": name, "url": url })
	except Exception as e:
	print(name, e)

	project_set = set()

	for i, res in enumerate(tqdm(users)):
	name, url = res["name"], res["url"]
	driver.get(url)
	soup = BeautifulSoup(driver.page_source, 'html.parser')
	projects = soup.find_all("div", class_="gallery-item")
	for project in projects:
	project_url = project.a.attrs["href"]
	project_set.add(project_url)
	time.sleep(0.5)

	project_urls = list(project_set)

	projects = []

	for i, url in enumerate(tqdm(project_urls)):
	driver.get(url)
	soup = BeautifulSoup(driver.page_source, 'html.parser')
	if len(soup.find_all("a", href=f"{HACKATHON_URL}/")):
	title = soup.find("h1", id="app-title").get_text()
	description = soup.find("p", class_="large").get_text().strip()
	projects.append({
	"title": title,
	"description": description,
	"url": url
	})
	print(i, title, url)
	time.sleep(0.5)

	# Stores all the data to an excel file for easy access :)
	pd.DataFrame(projects).to_excel("projects.xlsx", index=False)