Skip to content

Instantly share code, notes, and snippets.

@grobbie
Created August 10, 2023 13:48
import requests
from bs4 import BeautifulSoup
import pandas as pd
def get_stackoverflow_data(tag="linux-distribution", num_pages=3):
base_url = "https://stackoverflow.com/questions/tagged/"
data = []
for page in range(1, num_pages + 1):
url = f"{base_url}{tag}?tab=votes&page={page}"
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, "html.parser")
questions = soup.find_all("div", class_="question-summary")
for question in questions:
title = question.find("h3").get_text().strip()
vote_count = int(question.find("span", class_="vote-count-post").get_text())
data.append({"title": title, "vote_count": vote_count})
return data
if __name__ == "__main__":
scraped_data = get_stackoverflow_data(tag="linux-distribution", num_pages=3)
df = pd.DataFrame(scraped_data)
df.to_csv("stack_overflow_data.csv", index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment