Skip to content

Instantly share code, notes, and snippets.

@kamakazikamikaze
Last active July 13, 2017 13:26
Show Gist options
  • Save kamakazikamikaze/e7874317c0553eb8cbf432efcd6ca514 to your computer and use it in GitHub Desktop.
Save kamakazikamikaze/e7874317c0553eb8cbf432efcd6ca514 to your computer and use it in GitHub Desktop.
select sum(battles), stats.tank_id, short_name, tier, is_premium from stats, tanks where tanks.tank_id=stats.tank_id group by stats.tank_id;
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import itertools\n",
"from matplotlib import dates as mdates\n",
"from matplotlib import pyplot as plt\n",
"from matplotlib import ticker\n",
"from matplotlib.dates import date2num\n",
"from collections import defaultdict\n",
"from datetime import datetime\n",
"from copy import deepcopy"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"~\\miniconda3\\envs\\wargaming\\lib\\site-packages\\IPython\\core\\interactiveshell.py:2718: DtypeWarning: Columns (0,1,2,5,6,7,8,9,10,11,12,13,14,15,17,18,19,21,23,26,27,28,29,30,31,32,34,35,36) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" interactivity=interactivity, compiler=compiler, result=result)\n"
]
}
],
"source": [
"data = pd.read_csv('../data/playerinfo xbox.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"activity = []\n",
"for created, last_battle in itertools.izip(data['created_at'], data['last_battle_time']):\n",
" activity.append((created, last_battle))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Remove players who have not played any matches"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"activity = filter(lambda x: x[1] != '1970-01-01', activity)\n",
"del activity[activity.index(('created_at', 'last_battle_time'))]"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total accounts in database: 11250901\n",
"Total valid accounts: 6924241\n",
"Total invalid accounts: 4326660\n"
]
}
],
"source": [
"print 'Total accounts in database:', len(data['created_at'])\n",
"print 'Total valid accounts:', len(activity)\n",
"print 'Total invalid accounts:', len(data['created_at']) - len(activity)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# joined = sorted([x[0] for x in activity if not any(t in x[0] for t in ('2017-04', '2017-05', '2017-06', '2017-07'))])\n",
"# left = sorted([x[1] for x in activity if not any(t in x[1] for t in ('2017-04', '2017-05', '2017-06', '2017-07'))])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"total_new = defaultdict(int)\n",
"\n",
"total_inactive = defaultdict(int)\n",
"\n",
"for player in activity:\n",
" joined = player[0].rsplit('-', 1)\n",
" total_new[joined[0]] += 1\n",
" last_active = player[1].rsplit('-', 1)\n",
" total_inactive[last_active[0]] += 1"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"total_size = defaultdict(int)\n",
"lastmonth = None\n",
"for month in sorted(total_new.keys()):\n",
" total_size[month] += total_new[month] - total_inactive[month]\n",
" if lastmonth:\n",
" total_size[month] += total_size[lastmonth]\n",
" lastmonth = month"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"gain = deepcopy(total_new)\n",
"loss = deepcopy(total_inactive)\n",
"active = deepcopy(total_size)\n",
"\n",
"with open('output/activity.csv', 'w') as f:\n",
" keys = sorted(total_new.keys())\n",
" f.write('Month,new,inactive,community size\\n')\n",
" for key in keys:\n",
" f.write(key + ',' + str(total_new[key]) + ',' + str(total_inactive[key]) + ',' + str(total_size[key]) + '\\n')\n",
"\n",
"del gain['2017-07']\n",
"del gain['2017-06']\n",
"del gain['2017-05']\n",
"del gain['2017-04']\n",
"del loss['2017-07']\n",
"del loss['2017-06']\n",
"del loss['2017-05']\n",
"del loss['2017-04']\n",
"del active['2017-07']\n",
"del active['2017-06']\n",
"del active['2017-05']\n",
"del active['2017-04']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Graphing community size"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# indent = np.arange(len(total_size.keys()))\n",
"xticks = list(sorted(total_size.keys()))[0:-4]\n",
"indent = date2num([datetime.strptime(x, '%Y-%m') for x in sorted(gain.keys())])"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"width = 10\n",
"gap = 0.5\n",
"fig = plt.figure()\n",
"fig.set_size_inches(22,13)\n",
"fig.suptitle('Xbox playerbase growth and decay')\n",
"ax = fig.add_subplot(111)\n",
"ax.xaxis.set_major_locator(mdates.MonthLocator())\n",
"ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))\n",
"ax.yaxis.set_major_locator(ticker.MultipleLocator(25000))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"bar_gain = ax.bar(\n",
" #indent,\n",
" # [datetime.strptime(x, '%Y-%m') for x in xticks],\n",
" indent - 5,\n",
" [val for _, val in sorted(gain.iteritems())],\n",
" width=width,\n",
"# color='green',\n",
" color='#009e73',\n",
" align='center'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"bar_loss = ax.bar(\n",
" #indent + width,\n",
" # [datetime.strptime(x, '%Y-%m') for x in xticks],\n",
" indent + 5,\n",
" [val for _, val in sorted(loss.iteritems())],\n",
" width=width,\n",
"# color='red',\n",
" color='#d55e00',\n",
" align='center'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"line_active = ax.plot(\n",
" indent,\n",
" [val for _, val in sorted(active.iteritems())],\n",
"# color='c',\n",
" color='#0072b2',\n",
" marker='o'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.legend.Legend at 0x186ee2dd8>"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fig.legend((bar_gain, bar_loss), ('New Accounts', 'Gone inactive'))\n",
"# ax.xaxis.set_ticklabels(xticks)\n",
"# _, labels = plt.xticks()\n",
"# plt.setp(labels, rotation=45)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"fig.autofmt_xdate()\n",
"fig.savefig('output/xbox playerbase activity.png', dpi=600)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"del fig\n",
"plt.close()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### New accounts vs short-lived accounts"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"lived_1 = defaultdict(int)\n",
"lived_7 = defaultdict(int)\n",
"lived_30 = defaultdict(int)\n",
"lived_90 = defaultdict(int)\n",
"lived_long = defaultdict(int)\n",
"for player in activity:\n",
" active_for = abs((datetime.strptime(player[1], '%Y-%m-%d') - datetime.strptime(player[0], '%Y-%m-%d')).days)\n",
" if active_for <= 1:\n",
" lived_1[player[0].rsplit('-', 1)[0]] += 1\n",
" elif active_for <= 7:\n",
" lived_7[player[0].rsplit('-', 1)[0]] += 1\n",
" elif active_for <= 30:\n",
" lived_30[player[0].rsplit('-', 1)[0]] += 1\n",
" elif active_for <= 90:\n",
" lived_90[player[0].rsplit('-', 1)[0]] += 1\n",
" else:\n",
" lived_long[player[0].rsplit('-', 1)[0]] += 1"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"with open('output/short lives.csv', 'w') as f:\n",
" keys = sorted(lived_1.keys())\n",
" f.write('Month,New Accounts,1 Day,7 Days,30 Days,90 Days,Long-term\\n')\n",
" for key in keys:\n",
" f.write(key + ',' + str(total_new[key]) + ',' + str(lived_1[key]) + ',' + str(lived_7[key]) + ',' + str(lived_30[key]) + ',' + str(lived_90[key]) + ',' + str(lived_long[key]) + '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Battles vs age"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [],
"source": [
"battle_age = []\n",
"with open('output/battles over time.csv', 'w') as f:\n",
" f.write('Age (Days),Battles\\n')\n",
" for _, row in data.iterrows():\n",
" try:\n",
" if datetime.utcfromtimestamp(row['last_battle_time_raw']).year == 1970:\n",
" continue\n",
" battle_age.append((abs((datetime.utcfromtimestamp(row['last_battle_time_raw']) - datetime.utcfromtimestamp(row['created_at_raw'])).days), row['battles']))\n",
" f.write(str(abs((datetime.utcfromtimestamp(row['last_battle_time_raw']) - datetime.utcfromtimestamp(row['created_at_raw'])).days)) + ',' + str(row['battles']) + '\\n')\n",
" except TypeError:\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"fig = plt.figure()\n",
"fig.set_size_inches(22,13)\n",
"fig.suptitle('Xbox battles vs account age')\n",
"ax = fig.add_subplot(111)\n",
"plt.xlabel('Account age (days)')\n",
"plt.ylabel('Battle count')"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x186d21d30>"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ax.scatter(\n",
" [ba[0] for ba in battle_age],\n",
" [ba[1] for ba in battle_age],\n",
" c=np.random.rand(len(battle_age)),\n",
" alpha=0.05\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"fig.savefig('output/battle vs age.png', dpi=600)"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
"del fig\n",
"plt.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment