immuntasir/Exploring Import Statements.ipynb

## Exploring Import Statements.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from tqdm import tqdm\n",
    "import pandas as pd\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('../../api_keys/github.txt', \"r\") as f:\n",
    "    API_KEY = f.read()\n",
    "    \n",
    "headers = {'Authorization': 'token %s' % API_KEY}\n",
    "\n",
    "LIBRARY = 'numpy'\n",
    "LANGUAGE = 'python'\n",
    "\n",
    "URL = 'https://api.github.com/search/repositories?q=%s+language:%s&sort=stars&order=desc&page=' % (LIBRARY, LANGUAGE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dict_keys(['total_count', 'incomplete_results', 'items'])\n",
      "Total Repositories: 10853\n",
      "Total number of items in a page: 30\n",
      "Keys in a item: dict_keys(['id', 'node_id', 'name', 'full_name', 'private', 'owner', 'html_url', 'description', 'fork', 'url', 'forks_url', 'keys_url', 'collaborators_url', 'teams_url', 'hooks_url', 'issue_events_url', 'events_url', 'assignees_url', 'branches_url', 'tags_url', 'blobs_url', 'git_tags_url', 'git_refs_url', 'trees_url', 'statuses_url', 'languages_url', 'stargazers_url', 'contributors_url', 'subscribers_url', 'subscription_url', 'commits_url', 'git_commits_url', 'comments_url', 'issue_comment_url', 'contents_url', 'compare_url', 'merges_url', 'archive_url', 'downloads_url', 'issues_url', 'pulls_url', 'milestones_url', 'notifications_url', 'labels_url', 'releases_url', 'deployments_url', 'created_at', 'updated_at', 'pushed_at', 'git_url', 'ssh_url', 'clone_url', 'svn_url', 'homepage', 'size', 'stargazers_count', 'watchers_count', 'language', 'has_issues', 'has_projects', 'has_downloads', 'has_wiki', 'has_pages', 'forks_count', 'mirror_url', 'archived', 'disabled', 'open_issues_count', 'license', 'forks', 'open_issues', 'watchers', 'default_branch', 'permissions', 'score'])\n"
     ]
    }
   ],
   "source": [
    "r = requests.get(URL + '1', headers=headers)\n",
    "json_response = r.json()\n",
    "\n",
    "print(json_response.keys())\n",
    "print('Total Repositories:', json_response['total_count'])\n",
    "print('Total number of items in a page:', len(json_response['items']))\n",
    "\n",
    "print('Keys in a item:', json_response['items'][0].keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 35/35 [08:17<00:00, 14.21s/it]\n"
     ]
    }
   ],
   "source": [
    "keys = ['name', 'full_name', 'html_url', 'clone_url', 'size', 'stargazers_count']\n",
    "NUMBER_OF_PAGES_TO_ITERATE = 35\n",
    "\n",
    "repo_dict = dict([(key, []) for key in keys])\n",
    "\n",
    "for page_num in tqdm(range(0, 35)):\n",
    "    r = requests.get(URL + str(page_num))\n",
    "    contents = r.json()\n",
    "    \n",
    "    for item in contents['items']:\n",
    "        for key in keys:\n",
    "            repo_dict[key].append(item[key])\n",
    "            \n",
    "    if page_num % 5 == 0:\n",
    "        time.sleep(60)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>full_name</th>\n",
       "      <th>html_url</th>\n",
       "      <th>clone_url</th>\n",
       "      <th>size</th>\n",
       "      <th>stargazers_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>data-science-ipython-notebooks</td>\n",
       "      <td>donnemartin/data-science-ipython-notebooks</td>\n",
       "      <td>https://github.com/donnemartin/data-science-ip...</td>\n",
       "      <td>https://github.com/donnemartin/data-science-ip...</td>\n",
       "      <td>49025</td>\n",
       "      <td>19568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ML-From-Scratch</td>\n",
       "      <td>eriklindernoren/ML-From-Scratch</td>\n",
       "      <td>https://github.com/eriklindernoren/ML-From-Scr...</td>\n",
       "      <td>https://github.com/eriklindernoren/ML-From-Scr...</td>\n",
       "      <td>553</td>\n",
       "      <td>16849</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>numpy</td>\n",
       "      <td>numpy/numpy</td>\n",
       "      <td>https://github.com/numpy/numpy</td>\n",
       "      <td>https://github.com/numpy/numpy.git</td>\n",
       "      <td>84293</td>\n",
       "      <td>15014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>jax</td>\n",
       "      <td>google/jax</td>\n",
       "      <td>https://github.com/google/jax</td>\n",
       "      <td>https://github.com/google/jax.git</td>\n",
       "      <td>28075</td>\n",
       "      <td>9795</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>numpy-ml</td>\n",
       "      <td>ddbourgin/numpy-ml</td>\n",
       "      <td>https://github.com/ddbourgin/numpy-ml</td>\n",
       "      <td>https://github.com/ddbourgin/numpy-ml.git</td>\n",
       "      <td>10416</td>\n",
       "      <td>8963</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                             name                                   full_name  \\\n",
       "0  data-science-ipython-notebooks  donnemartin/data-science-ipython-notebooks   \n",
       "1                 ML-From-Scratch             eriklindernoren/ML-From-Scratch   \n",
       "2                           numpy                                 numpy/numpy   \n",
       "3                             jax                                  google/jax   \n",
       "4                        numpy-ml                          ddbourgin/numpy-ml   \n",
       "\n",
       "                                            html_url  \\\n",
       "0  https://github.com/donnemartin/data-science-ip...   \n",
       "1  https://github.com/eriklindernoren/ML-From-Scr...   \n",
       "2                     https://github.com/numpy/numpy   \n",
       "3                      https://github.com/google/jax   \n",
       "4              https://github.com/ddbourgin/numpy-ml   \n",
       "\n",
       "                                           clone_url   size  stargazers_count  \n",
       "0  https://github.com/donnemartin/data-science-ip...  49025             19568  \n",
       "1  https://github.com/eriklindernoren/ML-From-Scr...    553             16849  \n",
       "2                 https://github.com/numpy/numpy.git  84293             15014  \n",
       "3                  https://github.com/google/jax.git  28075              9795  \n",
       "4          https://github.com/ddbourgin/numpy-ml.git  10416              8963  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "repo_df = pd.DataFrame(repo_dict)\n",
    "repo_df.to_csv('../../data/package_popularity/numpy/repo_info.csv', index=None)\n",
    "\n",
    "repo_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of repositories: 1050\n"
     ]
    }
   ],
   "source": [
    "print('Number of repositories:', len(repo_df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import requests\n",
	"from tqdm import tqdm\n",
	"import pandas as pd\n",
	"import time"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"with open('../../api_keys/github.txt', \"r\") as f:\n",
	" API_KEY = f.read()\n",
	" \n",
	"headers = {'Authorization': 'token %s' % API_KEY}\n",
	"\n",
	"LIBRARY = 'numpy'\n",
	"LANGUAGE = 'python'\n",
	"\n",
	"URL = 'https://api.github.com/search/repositories?q=%s+language:%s&sort=stars&order=desc&page=' % (LIBRARY, LANGUAGE)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"dict_keys(['total_count', 'incomplete_results', 'items'])\n",
	"Total Repositories: 10853\n",
	"Total number of items in a page: 30\n",
	"Keys in a item: dict_keys(['id', 'node_id', 'name', 'full_name', 'private', 'owner', 'html_url', 'description', 'fork', 'url', 'forks_url', 'keys_url', 'collaborators_url', 'teams_url', 'hooks_url', 'issue_events_url', 'events_url', 'assignees_url', 'branches_url', 'tags_url', 'blobs_url', 'git_tags_url', 'git_refs_url', 'trees_url', 'statuses_url', 'languages_url', 'stargazers_url', 'contributors_url', 'subscribers_url', 'subscription_url', 'commits_url', 'git_commits_url', 'comments_url', 'issue_comment_url', 'contents_url', 'compare_url', 'merges_url', 'archive_url', 'downloads_url', 'issues_url', 'pulls_url', 'milestones_url', 'notifications_url', 'labels_url', 'releases_url', 'deployments_url', 'created_at', 'updated_at', 'pushed_at', 'git_url', 'ssh_url', 'clone_url', 'svn_url', 'homepage', 'size', 'stargazers_count', 'watchers_count', 'language', 'has_issues', 'has_projects', 'has_downloads', 'has_wiki', 'has_pages', 'forks_count', 'mirror_url', 'archived', 'disabled', 'open_issues_count', 'license', 'forks', 'open_issues', 'watchers', 'default_branch', 'permissions', 'score'])\n"
	]
	}
	],
	"source": [
	"r = requests.get(URL + '1', headers=headers)\n",
	"json_response = r.json()\n",
	"\n",
	"print(json_response.keys())\n",
	"print('Total Repositories:', json_response['total_count'])\n",
	"print('Total number of items in a page:', len(json_response['items']))\n",
	"\n",
	"print('Keys in a item:', json_response['items'][0].keys())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"100%\|██████████\| 35/35 [08:17<00:00, 14.21s/it]\n"
	]
	}
	],
	"source": [
	"keys = ['name', 'full_name', 'html_url', 'clone_url', 'size', 'stargazers_count']\n",
	"NUMBER_OF_PAGES_TO_ITERATE = 35\n",
	"\n",
	"repo_dict = dict([(key, []) for key in keys])\n",
	"\n",
	"for page_num in tqdm(range(0, 35)):\n",
	" r = requests.get(URL + str(page_num))\n",
	" contents = r.json()\n",
	" \n",
	" for item in contents['items']:\n",
	" for key in keys:\n",
	" repo_dict[key].append(item[key])\n",
	" \n",
	" if page_num % 5 == 0:\n",
	" time.sleep(60)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>name</th>\n",
	" <th>full_name</th>\n",
	" <th>html_url</th>\n",
	" <th>clone_url</th>\n",
	" <th>size</th>\n",
	" <th>stargazers_count</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>data-science-ipython-notebooks</td>\n",
	" <td>donnemartin/data-science-ipython-notebooks</td>\n",
	" <td>https://github.com/donnemartin/data-science-ip...</td>\n",
	" <td>https://github.com/donnemartin/data-science-ip...</td>\n",
	" <td>49025</td>\n",
	" <td>19568</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>ML-From-Scratch</td>\n",
	" <td>eriklindernoren/ML-From-Scratch</td>\n",
	" <td>https://github.com/eriklindernoren/ML-From-Scr...</td>\n",
	" <td>https://github.com/eriklindernoren/ML-From-Scr...</td>\n",
	" <td>553</td>\n",
	" <td>16849</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>numpy</td>\n",
	" <td>numpy/numpy</td>\n",
	" <td>https://github.com/numpy/numpy</td>\n",
	" <td>https://github.com/numpy/numpy.git</td>\n",
	" <td>84293</td>\n",
	" <td>15014</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>jax</td>\n",
	" <td>google/jax</td>\n",
	" <td>https://github.com/google/jax</td>\n",
	" <td>https://github.com/google/jax.git</td>\n",
	" <td>28075</td>\n",
	" <td>9795</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>numpy-ml</td>\n",
	" <td>ddbourgin/numpy-ml</td>\n",
	" <td>https://github.com/ddbourgin/numpy-ml</td>\n",
	" <td>https://github.com/ddbourgin/numpy-ml.git</td>\n",
	" <td>10416</td>\n",
	" <td>8963</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" name full_name \\\n",
	"0 data-science-ipython-notebooks donnemartin/data-science-ipython-notebooks \n",
	"1 ML-From-Scratch eriklindernoren/ML-From-Scratch \n",
	"2 numpy numpy/numpy \n",
	"3 jax google/jax \n",
	"4 numpy-ml ddbourgin/numpy-ml \n",
	"\n",
	" html_url \\\n",
	"0 https://github.com/donnemartin/data-science-ip... \n",
	"1 https://github.com/eriklindernoren/ML-From-Scr... \n",
	"2 https://github.com/numpy/numpy \n",
	"3 https://github.com/google/jax \n",
	"4 https://github.com/ddbourgin/numpy-ml \n",
	"\n",
	" clone_url size stargazers_count \n",
	"0 https://github.com/donnemartin/data-science-ip... 49025 19568 \n",
	"1 https://github.com/eriklindernoren/ML-From-Scr... 553 16849 \n",
	"2 https://github.com/numpy/numpy.git 84293 15014 \n",
	"3 https://github.com/google/jax.git 28075 9795 \n",
	"4 https://github.com/ddbourgin/numpy-ml.git 10416 8963 "
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"repo_df = pd.DataFrame(repo_dict)\n",
	"repo_df.to_csv('../../data/package_popularity/numpy/repo_info.csv', index=None)\n",
	"\n",
	"repo_df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Number of repositories: 1050\n"
	]
	}
	],
	"source": [
	"print('Number of repositories:', len(repo_df))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.8.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}