parente/README.md

## README.md

      
    Raw
  

              README.md
            
          
## docker_stacks_stats.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Docker Stacks Stats\n",
    "\n",
    "Fetch the number of times the [jupyter/docker-stacks](https://github.com/jupyter/docker-stacks) images have been pulled from and starred on Docker Hub."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import pandas as pd\n",
    "import datetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Last run: 2019-05-12 00:17:28.378124 UTC\n"
     ]
    }
   ],
   "source": [
    "print('Last run:', datetime.datetime.utcnow(), 'UTC')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Get all stack names from GitHub."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "resp = requests.get('https://api.github.com/repos/jupyter/docker-stacks/contents/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "contents = resp.json()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Filter out folders that aren't for stacks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "stacks = [content['name'] for content in contents \n",
    "          if content['name'].endswith('-notebook') or content['name'].endswith('-kernel')]    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Interest"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Get Docker Hub stats."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "hub = {}\n",
    "for stack in stacks:\n",
    "    resp = requests.get('https://hub.docker.com/v2/repositories/jupyter/{}/'.format(stack))\n",
    "    hub[stack] = resp.json()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pull_count</th>\n",
       "      <th>star_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>datascience-notebook</th>\n",
       "      <td>4872772</td>\n",
       "      <td>468</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>all-spark-notebook</th>\n",
       "      <td>3012406</td>\n",
       "      <td>231</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>scipy-notebook</th>\n",
       "      <td>2482236</td>\n",
       "      <td>163</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tensorflow-notebook</th>\n",
       "      <td>1031086</td>\n",
       "      <td>136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>minimal-notebook</th>\n",
       "      <td>861240</td>\n",
       "      <td>76</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>base-notebook</th>\n",
       "      <td>413683</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pyspark-notebook</th>\n",
       "      <td>173862</td>\n",
       "      <td>101</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>r-notebook</th>\n",
       "      <td>76728</td>\n",
       "      <td>22</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     pull_count star_count\n",
       "datascience-notebook    4872772        468\n",
       "all-spark-notebook      3012406        231\n",
       "scipy-notebook          2482236        163\n",
       "tensorflow-notebook     1031086        136\n",
       "minimal-notebook         861240         76\n",
       "base-notebook            413683         64\n",
       "pyspark-notebook         173862        101\n",
       "r-notebook                76728         22"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(hub).T[['pull_count', 'star_count']].sort_values(by='pull_count', ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Sizes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def full_size_for_tag(tag):\n",
    "    image = {}\n",
    "    for stack in stacks:\n",
    "        resp = requests.get(f'https://hub.docker.com/v2/repositories/jupyter/{stack}/tags/{tag}')\n",
    "        image[stack] = resp.json()\n",
    "    return pd.DataFrame(image).T[['full_size']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_new = full_size_for_tag('latest').rename(columns={'full_size': 'new_size_mb'})\n",
    "df_old = full_size_for_tag('2662627f26e0').rename(columns={'full_size': 'old_size_mb'})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Treating storage megabyte as 1 000 000."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.merge(df_new, df_old, left_index=True, right_index=True) / 1e6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['delta_mb'] = df.new_size_mb - df.old_size_mb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>new_size_mb</th>\n",
       "      <th>old_size_mb</th>\n",
       "      <th>delta_mb</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>all-spark-notebook</th>\n",
       "      <td>2145.16</td>\n",
       "      <td>2145.16</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>base-notebook</th>\n",
       "      <td>216.035</td>\n",
       "      <td>259.499</td>\n",
       "      <td>-43.4638</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>datascience-notebook</th>\n",
       "      <td>2047.63</td>\n",
       "      <td>2115.08</td>\n",
       "      <td>-67.4586</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>minimal-notebook</th>\n",
       "      <td>1040.8</td>\n",
       "      <td>1083.59</td>\n",
       "      <td>-42.7885</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pyspark-notebook</th>\n",
       "      <td>1809.2</td>\n",
       "      <td>1809.2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>r-notebook</th>\n",
       "      <td>1472.44</td>\n",
       "      <td>1530.85</td>\n",
       "      <td>-58.4122</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>scipy-notebook</th>\n",
       "      <td>1384.29</td>\n",
       "      <td>1444.94</td>\n",
       "      <td>-60.6465</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tensorflow-notebook</th>\n",
       "      <td>1629.26</td>\n",
       "      <td>1558.54</td>\n",
       "      <td>70.7198</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     new_size_mb old_size_mb delta_mb\n",
       "all-spark-notebook       2145.16     2145.16        0\n",
       "base-notebook            216.035     259.499 -43.4638\n",
       "datascience-notebook     2047.63     2115.08 -67.4586\n",
       "minimal-notebook          1040.8     1083.59 -42.7885\n",
       "pyspark-notebook          1809.2      1809.2        0\n",
       "r-notebook               1472.44     1530.85 -58.4122\n",
       "scipy-notebook           1384.29     1444.94 -60.6465\n",
       "tensorflow-notebook      1629.26     1558.54  70.7198"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  }
 ],
 "metadata": {
  "gist_id": "316d5c242aeb484484c8",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

## requirements.txt
pandas
requests
tabulate
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Docker Stacks Stats\n",
	"\n",
	"Fetch the number of times the [jupyter/docker-stacks](https://github.com/jupyter/docker-stacks) images have been pulled from and starred on Docker Hub."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import requests\n",
	"import pandas as pd\n",
	"import datetime"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Last run: 2019-05-12 00:17:28.378124 UTC\n"
	]
	}
	],
	"source": [
	"print('Last run:', datetime.datetime.utcnow(), 'UTC')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Get all stack names from GitHub."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"resp = requests.get('https://api.github.com/repos/jupyter/docker-stacks/contents/')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"contents = resp.json()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Filter out folders that aren't for stacks."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"stacks = [content['name'] for content in contents \n",
	" if content['name'].endswith('-notebook') or content['name'].endswith('-kernel')] "
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Interest"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Get Docker Hub stats."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"hub = {}\n",
	"for stack in stacks:\n",
	" resp = requests.get('https://hub.docker.com/v2/repositories/jupyter/{}/'.format(stack))\n",
	" hub[stack] = resp.json()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"scrolled": false
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>pull_count</th>\n",
	" <th>star_count</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>datascience-notebook</th>\n",
	" <td>4872772</td>\n",
	" <td>468</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>all-spark-notebook</th>\n",
	" <td>3012406</td>\n",
	" <td>231</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>scipy-notebook</th>\n",
	" <td>2482236</td>\n",
	" <td>163</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>tensorflow-notebook</th>\n",
	" <td>1031086</td>\n",
	" <td>136</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>minimal-notebook</th>\n",
	" <td>861240</td>\n",
	" <td>76</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>base-notebook</th>\n",
	" <td>413683</td>\n",
	" <td>64</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>pyspark-notebook</th>\n",
	" <td>173862</td>\n",
	" <td>101</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>r-notebook</th>\n",
	" <td>76728</td>\n",
	" <td>22</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" pull_count star_count\n",
	"datascience-notebook 4872772 468\n",
	"all-spark-notebook 3012406 231\n",
	"scipy-notebook 2482236 163\n",
	"tensorflow-notebook 1031086 136\n",
	"minimal-notebook 861240 76\n",
	"base-notebook 413683 64\n",
	"pyspark-notebook 173862 101\n",
	"r-notebook 76728 22"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"pd.DataFrame(hub).T[['pull_count', 'star_count']].sort_values(by='pull_count', ascending=False)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Sizes"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"def full_size_for_tag(tag):\n",
	" image = {}\n",
	" for stack in stacks:\n",
	" resp = requests.get(f'https://hub.docker.com/v2/repositories/jupyter/{stack}/tags/{tag}')\n",
	" image[stack] = resp.json()\n",
	" return pd.DataFrame(image).T[['full_size']]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [],
	"source": [
	"df_new = full_size_for_tag('latest').rename(columns={'full_size': 'new_size_mb'})\n",
	"df_old = full_size_for_tag('2662627f26e0').rename(columns={'full_size': 'old_size_mb'})"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Treating storage megabyte as 1 000 000."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"df = pd.merge(df_new, df_old, left_index=True, right_index=True) / 1e6"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [],
	"source": [
	"df['delta_mb'] = df.new_size_mb - df.old_size_mb"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>new_size_mb</th>\n",
	" <th>old_size_mb</th>\n",
	" <th>delta_mb</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>all-spark-notebook</th>\n",
	" <td>2145.16</td>\n",
	" <td>2145.16</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>base-notebook</th>\n",
	" <td>216.035</td>\n",
	" <td>259.499</td>\n",
	" <td>-43.4638</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>datascience-notebook</th>\n",
	" <td>2047.63</td>\n",
	" <td>2115.08</td>\n",
	" <td>-67.4586</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>minimal-notebook</th>\n",
	" <td>1040.8</td>\n",
	" <td>1083.59</td>\n",
	" <td>-42.7885</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>pyspark-notebook</th>\n",
	" <td>1809.2</td>\n",
	" <td>1809.2</td>\n",
	" <td>0</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>r-notebook</th>\n",
	" <td>1472.44</td>\n",
	" <td>1530.85</td>\n",
	" <td>-58.4122</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>scipy-notebook</th>\n",
	" <td>1384.29</td>\n",
	" <td>1444.94</td>\n",
	" <td>-60.6465</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>tensorflow-notebook</th>\n",
	" <td>1629.26</td>\n",
	" <td>1558.54</td>\n",
	" <td>70.7198</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" new_size_mb old_size_mb delta_mb\n",
	"all-spark-notebook 2145.16 2145.16 0\n",
	"base-notebook 216.035 259.499 -43.4638\n",
	"datascience-notebook 2047.63 2115.08 -67.4586\n",
	"minimal-notebook 1040.8 1083.59 -42.7885\n",
	"pyspark-notebook 1809.2 1809.2 0\n",
	"r-notebook 1472.44 1530.85 -58.4122\n",
	"scipy-notebook 1384.29 1444.94 -60.6465\n",
	"tensorflow-notebook 1629.26 1558.54 70.7198"
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df"
	]
	}
	],
	"metadata": {
	"gist_id": "316d5c242aeb484484c8",
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}