Skip to content

Instantly share code, notes, and snippets.

@ahartikainen
Last active January 26, 2019 23:00
Show Gist options
  • Save ahartikainen/d290c1c87c66590dadd2abe33e76048b to your computer and use it in GitHub Desktop.
Save ahartikainen/d290c1c87c66590dadd2abe33e76048b to your computer and use it in GitHub Desktop.
Get stats for PyStan
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"import sys\n",
"logger = logging.getLogger('pandas_gbq')\n",
"logger.setLevel(logging.DEBUG)\n",
"logger.addHandler(logging.StreamHandler(stream=sys.stdout))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Package"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"package = \"pystan\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Query\n",
"\n",
"see https://packaging.python.org/guides/analyzing-pypi-package-downloads/"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"query_ranges = [(item.min(), item.max(), len(item)) for item in pd.np.array_split(pd.date_range(start=\"2013-07-23\", end=\"2019-01-26\"), 10)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"query_text = \"\"\"\n",
"SELECT \n",
" timestamp,\n",
" country_code,\n",
" file.filename,\n",
" file.version,\n",
" details.installer.version,\n",
" details.python,\n",
" details.implementation.name,\n",
" details.implementation.version,\n",
" details.system.name,\n",
" details.system.release,\n",
" details.cpu,\n",
"FROM \n",
" TABLE_DATE_RANGE(\n",
" [the-psf:pypi.downloads],\n",
" TIMESTAMP(\"{date_start}\"),\n",
" TIMESTAMP(\"{date_end}\")\n",
" )\n",
"WHERE \n",
" file.project=\"{package}\"\n",
"\"\"\".strip()\n",
"\n",
"queries = [query_text.format(package=package, \n",
" date_start=str(dstart.date()),\n",
" date_end=str(dend.date())) \\\n",
" for dstart, dend, _ in query_ranges]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"exceptions = []"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Settings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# fill these _\n",
"\n",
"package = pystan\n",
"\n",
"project_id = \"_\"\n",
"\n",
"# path to json file\n",
"private_key = \"./_.json\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Google BigQuery"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stime = pd.datetime.now()\n",
"data_list = []\n",
"for i, query in enumerate(queries):\n",
" print(\"Query {}: ...\".format(i), end='')\n",
" try:\n",
" data = pd.read_gbq(query=query, \n",
" project_id=project_id, \n",
" private_key=private_key)\n",
" except Exception as e:\n",
" print(e)\n",
" exceptions.append((query, e))\n",
" continue\n",
" data_list.append(data)\n",
" print(\" Done!\")\n",
"etime = pd.datetime.now()\n",
"print(\" Duration: \", etime-stime)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Save output"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%time df = pd.concat(data_list)\n",
"df.to_csv(\"pystan_pypi_download_stats.csv\")\n",
"df.shape"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment