Skip to content

Instantly share code, notes, and snippets.

Created November 30, 2022 14:36
Show Gist options
  • Save andfanilo/a82cc70910e3a3dd7279055ec89a3e7d to your computer and use it in GitHub Desktop.
Save andfanilo/a82cc70910e3a3dd7279055ec89a3e7d to your computer and use it in GitHub Desktop.
Scrape Components Tracker wiki
pip install beautifulsoup4 ghapi requests streamlit
Add [github] > key value of your Github API Token in .streamlit/secrets.toml
streamlit run
from typing import Tuple
import pandas as pd
import requests
import streamlit as st
from bs4 import BeautifulSoup
from bs4.element import ResultSet
from ghapi.all import GhApi
from ghapi.all import paged
def load_api():
token = st.secrets["github"]["key"]
return GhApi(token=token)
def check_rate_limit():
return load_api().rate_limit.get()
def scrape_tracker_wiki() -> ResultSet:
url = ""
resp = requests.get(url)
assert resp.status_code == 200
data = resp.json()["post_stream"]["posts"][0]["cooked"]
soup = BeautifulSoup(f"<html>{data}</html>", "html.parser")
published_soup = soup.find("h3").find_next_sibling()
all_links = published_soup.find_all("a", text="GitHub")
return all_links
def extract_info_from_link(soup_link: str) -> Tuple[str, str]:
arr = soup_link.split("/")
user = arr[-2]
repo = arr[-1]
return (user, repo)
def count_stargazers(link: str) -> Tuple[str, int]:
username, repository = extract_info_from_link(link)
api = load_api()
all_pages = paged(api.activity.list_stargazers_for_repo, username, repository)
data = [user for page in all_pages for user in page]
return (f"{username}/{repository}", len(data))
def main():
all_links = scrape_tracker_wiki()
components_stars = [count_stargazers(l["href"]) for l in all_links]
df = pd.DataFrame(components_stars, columns=["repo", "count"])
if __name__ == "__main__":
st.set_page_config(page_title="Components Tracker", page_icon=":book:")
st.title("All the Components")
with st.sidebar:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment