Skip to content

Instantly share code, notes, and snippets.

@immuntasir
Created October 3, 2020 13:44
Show Gist options
  • Save immuntasir/9e8f1eb5023a1d4aee84eb869f94e497 to your computer and use it in GitHub Desktop.
Save immuntasir/9e8f1eb5023a1d4aee84eb869f94e497 to your computer and use it in GitHub Desktop.
Exploring import statements that use numpy
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from tqdm import tqdm\n",
"import pandas as pd\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"with open('../../api_keys/github.txt', \"r\") as f:\n",
" API_KEY = f.read()\n",
" \n",
"headers = {'Authorization': 'token %s' % API_KEY}\n",
"\n",
"LIBRARY = 'numpy'\n",
"LANGUAGE = 'python'\n",
"\n",
"URL = 'https://api.github.com/search/repositories?q=%s+language:%s&sort=stars&order=desc&page=' % (LIBRARY, LANGUAGE)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dict_keys(['total_count', 'incomplete_results', 'items'])\n",
"Total Repositories: 10853\n",
"Total number of items in a page: 30\n",
"Keys in a item: dict_keys(['id', 'node_id', 'name', 'full_name', 'private', 'owner', 'html_url', 'description', 'fork', 'url', 'forks_url', 'keys_url', 'collaborators_url', 'teams_url', 'hooks_url', 'issue_events_url', 'events_url', 'assignees_url', 'branches_url', 'tags_url', 'blobs_url', 'git_tags_url', 'git_refs_url', 'trees_url', 'statuses_url', 'languages_url', 'stargazers_url', 'contributors_url', 'subscribers_url', 'subscription_url', 'commits_url', 'git_commits_url', 'comments_url', 'issue_comment_url', 'contents_url', 'compare_url', 'merges_url', 'archive_url', 'downloads_url', 'issues_url', 'pulls_url', 'milestones_url', 'notifications_url', 'labels_url', 'releases_url', 'deployments_url', 'created_at', 'updated_at', 'pushed_at', 'git_url', 'ssh_url', 'clone_url', 'svn_url', 'homepage', 'size', 'stargazers_count', 'watchers_count', 'language', 'has_issues', 'has_projects', 'has_downloads', 'has_wiki', 'has_pages', 'forks_count', 'mirror_url', 'archived', 'disabled', 'open_issues_count', 'license', 'forks', 'open_issues', 'watchers', 'default_branch', 'permissions', 'score'])\n"
]
}
],
"source": [
"r = requests.get(URL + '1', headers=headers)\n",
"json_response = r.json()\n",
"\n",
"print(json_response.keys())\n",
"print('Total Repositories:', json_response['total_count'])\n",
"print('Total number of items in a page:', len(json_response['items']))\n",
"\n",
"print('Keys in a item:', json_response['items'][0].keys())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 35/35 [08:17<00:00, 14.21s/it]\n"
]
}
],
"source": [
"keys = ['name', 'full_name', 'html_url', 'clone_url', 'size', 'stargazers_count']\n",
"NUMBER_OF_PAGES_TO_ITERATE = 35\n",
"\n",
"repo_dict = dict([(key, []) for key in keys])\n",
"\n",
"for page_num in tqdm(range(0, 35)):\n",
" r = requests.get(URL + str(page_num))\n",
" contents = r.json()\n",
" \n",
" for item in contents['items']:\n",
" for key in keys:\n",
" repo_dict[key].append(item[key])\n",
" \n",
" if page_num % 5 == 0:\n",
" time.sleep(60)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>full_name</th>\n",
" <th>html_url</th>\n",
" <th>clone_url</th>\n",
" <th>size</th>\n",
" <th>stargazers_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>data-science-ipython-notebooks</td>\n",
" <td>donnemartin/data-science-ipython-notebooks</td>\n",
" <td>https://github.com/donnemartin/data-science-ip...</td>\n",
" <td>https://github.com/donnemartin/data-science-ip...</td>\n",
" <td>49025</td>\n",
" <td>19568</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>ML-From-Scratch</td>\n",
" <td>eriklindernoren/ML-From-Scratch</td>\n",
" <td>https://github.com/eriklindernoren/ML-From-Scr...</td>\n",
" <td>https://github.com/eriklindernoren/ML-From-Scr...</td>\n",
" <td>553</td>\n",
" <td>16849</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>numpy</td>\n",
" <td>numpy/numpy</td>\n",
" <td>https://github.com/numpy/numpy</td>\n",
" <td>https://github.com/numpy/numpy.git</td>\n",
" <td>84293</td>\n",
" <td>15014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>jax</td>\n",
" <td>google/jax</td>\n",
" <td>https://github.com/google/jax</td>\n",
" <td>https://github.com/google/jax.git</td>\n",
" <td>28075</td>\n",
" <td>9795</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>numpy-ml</td>\n",
" <td>ddbourgin/numpy-ml</td>\n",
" <td>https://github.com/ddbourgin/numpy-ml</td>\n",
" <td>https://github.com/ddbourgin/numpy-ml.git</td>\n",
" <td>10416</td>\n",
" <td>8963</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name full_name \\\n",
"0 data-science-ipython-notebooks donnemartin/data-science-ipython-notebooks \n",
"1 ML-From-Scratch eriklindernoren/ML-From-Scratch \n",
"2 numpy numpy/numpy \n",
"3 jax google/jax \n",
"4 numpy-ml ddbourgin/numpy-ml \n",
"\n",
" html_url \\\n",
"0 https://github.com/donnemartin/data-science-ip... \n",
"1 https://github.com/eriklindernoren/ML-From-Scr... \n",
"2 https://github.com/numpy/numpy \n",
"3 https://github.com/google/jax \n",
"4 https://github.com/ddbourgin/numpy-ml \n",
"\n",
" clone_url size stargazers_count \n",
"0 https://github.com/donnemartin/data-science-ip... 49025 19568 \n",
"1 https://github.com/eriklindernoren/ML-From-Scr... 553 16849 \n",
"2 https://github.com/numpy/numpy.git 84293 15014 \n",
"3 https://github.com/google/jax.git 28075 9795 \n",
"4 https://github.com/ddbourgin/numpy-ml.git 10416 8963 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"repo_df = pd.DataFrame(repo_dict)\n",
"repo_df.to_csv('../../data/package_popularity/numpy/repo_info.csv', index=None)\n",
"\n",
"repo_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of repositories: 1050\n"
]
}
],
"source": [
"print('Number of repositories:', len(repo_df))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment