Skip to content

Instantly share code, notes, and snippets.

@ashvardanian
Created November 11, 2023 06:21
Show Gist options
  • Save ashvardanian/76bae6147fe5581176529399ac0cb6bd to your computer and use it in GitHub Desktop.
Save ashvardanian/76bae6147fe5581176529399ac0cb6bd to your computer and use it in GitHub Desktop.
Intersect Stargazers in a Venn Diagram
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install matplotlib matplotlib-venn"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import time\n",
"\n",
"def get_stargazers(repo_owner, repo_name):\n",
" \"\"\"Fetches the list of users who have starred the given GitHub repository.\"\"\"\n",
" results = []\n",
" page = 1\n",
" while True:\n",
" url = f\"https://api.github.com/repos/{repo_owner}/{repo_name}/stargazers?page={page}&per_page=100\"\n",
" response = requests.get(url)\n",
" if response.status_code == 200:\n",
" stargazers = response.json()\n",
" if not stargazers:\n",
" break # Exit loop if no more stargazers\n",
" for user in stargazers:\n",
" results.append(user['login'])\n",
" page += 1 # Increment page number for next iteration\n",
" time.sleep(10)\n",
" else:\n",
" print(f\"Error: {response.status_code} for url: {url}\")\n",
" break # Exit loop on error\n",
" return results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"name_one = (\"unum-cloud\", \"usearch\")\n",
"name_two = (\"unum-cloud\", \"ucall\")\n",
"name_three = (\"ashvardanian\", \"stringzilla\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stars_one = get_stargazers(*name_one)\n",
"len(stars_one)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stars_two = get_stargazers(*name_two)\n",
"len(stars_two)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stars_three = get_stargazers(*name_three)\n",
"len(stars_three)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from matplotlib import pyplot as plt\n",
"from matplotlib_venn import venn3\n",
"\n",
"venn3([set(stars_one), set(stars_two), set(stars_three)], ['/'.join(name_one), '/'.join(name_two), '/'.join(name_three)])\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
@ashvardanian
Copy link
Author

Be ready, that the GitHub API has an aggressive rate-limiter for non-authorized applications and also limits pagination to 400 pages.
This means, addressing over 40K GitHub stars with this API is impossible. For authorization, use your custom developer token:

headers = {'Authorization': 'token your-long-token-here'}
response = requests.get(url, headers=headers)

Attached you can find some of the outputs.

stars-ai-apps-frameworks
stars-ai-frameworks
stars-ashvardanian-877
stars-databases
stars-datascience
stars-json-cpp
stars-llama
stars-python-backend
stars-vector-indexes
stars-vector-products
stars-web

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment