Skip to content

Instantly share code, notes, and snippets.

@parente
Last active May 12, 2019 00:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save parente/316d5c242aeb484484c8 to your computer and use it in GitHub Desktop.
Save parente/316d5c242aeb484484c8 to your computer and use it in GitHub Desktop.
Docker Stacks Stats
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Docker Stacks Stats\n",
"\n",
"Fetch the number of times the [jupyter/docker-stacks](https://github.com/jupyter/docker-stacks) images have been pulled from and starred on Docker Hub."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import pandas as pd\n",
"import datetime"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Last run: 2019-05-12 00:17:28.378124 UTC\n"
]
}
],
"source": [
"print('Last run:', datetime.datetime.utcnow(), 'UTC')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get all stack names from GitHub."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"resp = requests.get('https://api.github.com/repos/jupyter/docker-stacks/contents/')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"contents = resp.json()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Filter out folders that aren't for stacks."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"stacks = [content['name'] for content in contents \n",
" if content['name'].endswith('-notebook') or content['name'].endswith('-kernel')] "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Interest"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get Docker Hub stats."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"hub = {}\n",
"for stack in stacks:\n",
" resp = requests.get('https://hub.docker.com/v2/repositories/jupyter/{}/'.format(stack))\n",
" hub[stack] = resp.json()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>pull_count</th>\n",
" <th>star_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>datascience-notebook</th>\n",
" <td>4872772</td>\n",
" <td>468</td>\n",
" </tr>\n",
" <tr>\n",
" <th>all-spark-notebook</th>\n",
" <td>3012406</td>\n",
" <td>231</td>\n",
" </tr>\n",
" <tr>\n",
" <th>scipy-notebook</th>\n",
" <td>2482236</td>\n",
" <td>163</td>\n",
" </tr>\n",
" <tr>\n",
" <th>tensorflow-notebook</th>\n",
" <td>1031086</td>\n",
" <td>136</td>\n",
" </tr>\n",
" <tr>\n",
" <th>minimal-notebook</th>\n",
" <td>861240</td>\n",
" <td>76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>base-notebook</th>\n",
" <td>413683</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>pyspark-notebook</th>\n",
" <td>173862</td>\n",
" <td>101</td>\n",
" </tr>\n",
" <tr>\n",
" <th>r-notebook</th>\n",
" <td>76728</td>\n",
" <td>22</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" pull_count star_count\n",
"datascience-notebook 4872772 468\n",
"all-spark-notebook 3012406 231\n",
"scipy-notebook 2482236 163\n",
"tensorflow-notebook 1031086 136\n",
"minimal-notebook 861240 76\n",
"base-notebook 413683 64\n",
"pyspark-notebook 173862 101\n",
"r-notebook 76728 22"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(hub).T[['pull_count', 'star_count']].sort_values(by='pull_count', ascending=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Sizes"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def full_size_for_tag(tag):\n",
" image = {}\n",
" for stack in stacks:\n",
" resp = requests.get(f'https://hub.docker.com/v2/repositories/jupyter/{stack}/tags/{tag}')\n",
" image[stack] = resp.json()\n",
" return pd.DataFrame(image).T[['full_size']]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"df_new = full_size_for_tag('latest').rename(columns={'full_size': 'new_size_mb'})\n",
"df_old = full_size_for_tag('2662627f26e0').rename(columns={'full_size': 'old_size_mb'})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Treating storage megabyte as 1 000 000."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df = pd.merge(df_new, df_old, left_index=True, right_index=True) / 1e6"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"df['delta_mb'] = df.new_size_mb - df.old_size_mb"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>new_size_mb</th>\n",
" <th>old_size_mb</th>\n",
" <th>delta_mb</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>all-spark-notebook</th>\n",
" <td>2145.16</td>\n",
" <td>2145.16</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>base-notebook</th>\n",
" <td>216.035</td>\n",
" <td>259.499</td>\n",
" <td>-43.4638</td>\n",
" </tr>\n",
" <tr>\n",
" <th>datascience-notebook</th>\n",
" <td>2047.63</td>\n",
" <td>2115.08</td>\n",
" <td>-67.4586</td>\n",
" </tr>\n",
" <tr>\n",
" <th>minimal-notebook</th>\n",
" <td>1040.8</td>\n",
" <td>1083.59</td>\n",
" <td>-42.7885</td>\n",
" </tr>\n",
" <tr>\n",
" <th>pyspark-notebook</th>\n",
" <td>1809.2</td>\n",
" <td>1809.2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>r-notebook</th>\n",
" <td>1472.44</td>\n",
" <td>1530.85</td>\n",
" <td>-58.4122</td>\n",
" </tr>\n",
" <tr>\n",
" <th>scipy-notebook</th>\n",
" <td>1384.29</td>\n",
" <td>1444.94</td>\n",
" <td>-60.6465</td>\n",
" </tr>\n",
" <tr>\n",
" <th>tensorflow-notebook</th>\n",
" <td>1629.26</td>\n",
" <td>1558.54</td>\n",
" <td>70.7198</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" new_size_mb old_size_mb delta_mb\n",
"all-spark-notebook 2145.16 2145.16 0\n",
"base-notebook 216.035 259.499 -43.4638\n",
"datascience-notebook 2047.63 2115.08 -67.4586\n",
"minimal-notebook 1040.8 1083.59 -42.7885\n",
"pyspark-notebook 1809.2 1809.2 0\n",
"r-notebook 1472.44 1530.85 -58.4122\n",
"scipy-notebook 1384.29 1444.94 -60.6465\n",
"tensorflow-notebook 1629.26 1558.54 70.7198"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
}
],
"metadata": {
"gist_id": "316d5c242aeb484484c8",
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
pandas
requests
tabulate
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment