Last active
May 12, 2019 00:20
-
-
Save parente/316d5c242aeb484484c8 to your computer and use it in GitHub Desktop.
Docker Stacks Stats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Docker Stacks Stats\n", | |
"\n", | |
"Fetch the number of times the [jupyter/docker-stacks](https://github.com/jupyter/docker-stacks) images have been pulled from and starred on Docker Hub." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"import pandas as pd\n", | |
"import datetime" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Last run: 2019-05-12 00:17:28.378124 UTC\n" | |
] | |
} | |
], | |
"source": [ | |
"print('Last run:', datetime.datetime.utcnow(), 'UTC')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Get all stack names from GitHub." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"resp = requests.get('https://api.github.com/repos/jupyter/docker-stacks/contents/')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"contents = resp.json()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Filter out folders that aren't for stacks." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"stacks = [content['name'] for content in contents \n", | |
" if content['name'].endswith('-notebook') or content['name'].endswith('-kernel')] " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Interest" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Get Docker Hub stats." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"hub = {}\n", | |
"for stack in stacks:\n", | |
" resp = requests.get('https://hub.docker.com/v2/repositories/jupyter/{}/'.format(stack))\n", | |
" hub[stack] = resp.json()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>pull_count</th>\n", | |
" <th>star_count</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>datascience-notebook</th>\n", | |
" <td>4872772</td>\n", | |
" <td>468</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>all-spark-notebook</th>\n", | |
" <td>3012406</td>\n", | |
" <td>231</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>scipy-notebook</th>\n", | |
" <td>2482236</td>\n", | |
" <td>163</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>tensorflow-notebook</th>\n", | |
" <td>1031086</td>\n", | |
" <td>136</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>minimal-notebook</th>\n", | |
" <td>861240</td>\n", | |
" <td>76</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>base-notebook</th>\n", | |
" <td>413683</td>\n", | |
" <td>64</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>pyspark-notebook</th>\n", | |
" <td>173862</td>\n", | |
" <td>101</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>r-notebook</th>\n", | |
" <td>76728</td>\n", | |
" <td>22</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" pull_count star_count\n", | |
"datascience-notebook 4872772 468\n", | |
"all-spark-notebook 3012406 231\n", | |
"scipy-notebook 2482236 163\n", | |
"tensorflow-notebook 1031086 136\n", | |
"minimal-notebook 861240 76\n", | |
"base-notebook 413683 64\n", | |
"pyspark-notebook 173862 101\n", | |
"r-notebook 76728 22" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.DataFrame(hub).T[['pull_count', 'star_count']].sort_values(by='pull_count', ascending=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Sizes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def full_size_for_tag(tag):\n", | |
" image = {}\n", | |
" for stack in stacks:\n", | |
" resp = requests.get(f'https://hub.docker.com/v2/repositories/jupyter/{stack}/tags/{tag}')\n", | |
" image[stack] = resp.json()\n", | |
" return pd.DataFrame(image).T[['full_size']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df_new = full_size_for_tag('latest').rename(columns={'full_size': 'new_size_mb'})\n", | |
"df_old = full_size_for_tag('2662627f26e0').rename(columns={'full_size': 'old_size_mb'})" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Treating storage megabyte as 1 000 000." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.merge(df_new, df_old, left_index=True, right_index=True) / 1e6" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df['delta_mb'] = df.new_size_mb - df.old_size_mb" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>new_size_mb</th>\n", | |
" <th>old_size_mb</th>\n", | |
" <th>delta_mb</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>all-spark-notebook</th>\n", | |
" <td>2145.16</td>\n", | |
" <td>2145.16</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>base-notebook</th>\n", | |
" <td>216.035</td>\n", | |
" <td>259.499</td>\n", | |
" <td>-43.4638</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>datascience-notebook</th>\n", | |
" <td>2047.63</td>\n", | |
" <td>2115.08</td>\n", | |
" <td>-67.4586</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>minimal-notebook</th>\n", | |
" <td>1040.8</td>\n", | |
" <td>1083.59</td>\n", | |
" <td>-42.7885</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>pyspark-notebook</th>\n", | |
" <td>1809.2</td>\n", | |
" <td>1809.2</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>r-notebook</th>\n", | |
" <td>1472.44</td>\n", | |
" <td>1530.85</td>\n", | |
" <td>-58.4122</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>scipy-notebook</th>\n", | |
" <td>1384.29</td>\n", | |
" <td>1444.94</td>\n", | |
" <td>-60.6465</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>tensorflow-notebook</th>\n", | |
" <td>1629.26</td>\n", | |
" <td>1558.54</td>\n", | |
" <td>70.7198</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" new_size_mb old_size_mb delta_mb\n", | |
"all-spark-notebook 2145.16 2145.16 0\n", | |
"base-notebook 216.035 259.499 -43.4638\n", | |
"datascience-notebook 2047.63 2115.08 -67.4586\n", | |
"minimal-notebook 1040.8 1083.59 -42.7885\n", | |
"pyspark-notebook 1809.2 1809.2 0\n", | |
"r-notebook 1472.44 1530.85 -58.4122\n", | |
"scipy-notebook 1384.29 1444.94 -60.6465\n", | |
"tensorflow-notebook 1629.26 1558.54 70.7198" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
} | |
], | |
"metadata": { | |
"gist_id": "316d5c242aeb484484c8", | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pandas | |
requests | |
tabulate | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment