Skip to content

Instantly share code, notes, and snippets.

@andfanilo
Created November 30, 2022 14:36
Show Gist options
  • Save andfanilo/a82cc70910e3a3dd7279055ec89a3e7d to your computer and use it in GitHub Desktop.
Save andfanilo/a82cc70910e3a3dd7279055ec89a3e7d to your computer and use it in GitHub Desktop.
Scrape Components Tracker wiki
"""
pip install beautifulsoup4 ghapi requests streamlit
Add [github] > key value of your Github API Token in .streamlit/secrets.toml
streamlit run streamlit_app.py
"""
from typing import Tuple
import pandas as pd
import requests
import streamlit as st
from bs4 import BeautifulSoup
from bs4.element import ResultSet
from ghapi.all import GhApi
from ghapi.all import paged
@st.experimental_singleton
def load_api():
token = st.secrets["github"]["key"]
return GhApi(token=token)
def check_rate_limit():
return load_api().rate_limit.get()
def scrape_tracker_wiki() -> ResultSet:
url = "https://discuss.streamlit.io/t/streamlit-components-community-tracker/4634.json"
resp = requests.get(url)
assert resp.status_code == 200
data = resp.json()["post_stream"]["posts"][0]["cooked"]
soup = BeautifulSoup(f"<html>{data}</html>", "html.parser")
published_soup = soup.find("h3").find_next_sibling()
all_links = published_soup.find_all("a", text="GitHub")
return all_links
def extract_info_from_link(soup_link: str) -> Tuple[str, str]:
arr = soup_link.split("/")
user = arr[-2]
repo = arr[-1]
return (user, repo)
@st.experimental_memo
def count_stargazers(link: str) -> Tuple[str, int]:
username, repository = extract_info_from_link(link)
api = load_api()
all_pages = paged(api.activity.list_stargazers_for_repo, username, repository)
data = [user for page in all_pages for user in page]
return (f"{username}/{repository}", len(data))
def main():
all_links = scrape_tracker_wiki()
components_stars = [count_stargazers(l["href"]) for l in all_links]
df = pd.DataFrame(components_stars, columns=["repo", "count"])
st.dataframe(df)
if __name__ == "__main__":
st.set_page_config(page_title="Components Tracker", page_icon=":book:")
st.title("All the Components")
with st.sidebar:
st.header("Configuration")
st.write(check_rate_limit())
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment