Data used for this session can be downloaded from: https://drive.google.com/open?id=0B1qZrP_GY8KXeHh4WjZCSmdIQkE
A combined listing of graphs that can be had at Plot.ly
select sum(battles), stats.tank_id, short_name, tier, is_premium from stats, tanks where tanks.tank_id=stats.tank_id group by stats.tank_id; |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import itertools\n", | |
"from matplotlib import dates as mdates\n", | |
"from matplotlib import pyplot as plt\n", | |
"from matplotlib import ticker\n", | |
"from matplotlib.dates import date2num\n", | |
"from collections import defaultdict\n", | |
"from datetime import datetime\n", | |
"from copy import deepcopy" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"~\\miniconda3\\envs\\wargaming\\lib\\site-packages\\IPython\\core\\interactiveshell.py:2718: DtypeWarning: Columns (0,1,2,5,6,7,8,9,10,11,12,13,14,15,17,18,19,21,23,26,27,28,29,30,31,32,34,35,36) have mixed types. Specify dtype option on import or set low_memory=False.\n", | |
" interactivity=interactivity, compiler=compiler, result=result)\n" | |
] | |
} | |
], | |
"source": [ | |
"data = pd.read_csv('../data/playerinfo xbox.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"activity = []\n", | |
"for created, last_battle in itertools.izip(data['created_at'], data['last_battle_time']):\n", | |
" activity.append((created, last_battle))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Remove players who have not played any matches" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"activity = filter(lambda x: x[1] != '1970-01-01', activity)\n", | |
"del activity[activity.index(('created_at', 'last_battle_time'))]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Total accounts in database: 11250901\n", | |
"Total valid accounts: 6924241\n", | |
"Total invalid accounts: 4326660\n" | |
] | |
} | |
], | |
"source": [ | |
"print 'Total accounts in database:', len(data['created_at'])\n", | |
"print 'Total valid accounts:', len(activity)\n", | |
"print 'Total invalid accounts:', len(data['created_at']) - len(activity)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# joined = sorted([x[0] for x in activity if not any(t in x[0] for t in ('2017-04', '2017-05', '2017-06', '2017-07'))])\n", | |
"# left = sorted([x[1] for x in activity if not any(t in x[1] for t in ('2017-04', '2017-05', '2017-06', '2017-07'))])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"total_new = defaultdict(int)\n", | |
"\n", | |
"total_inactive = defaultdict(int)\n", | |
"\n", | |
"for player in activity:\n", | |
" joined = player[0].rsplit('-', 1)\n", | |
" total_new[joined[0]] += 1\n", | |
" last_active = player[1].rsplit('-', 1)\n", | |
" total_inactive[last_active[0]] += 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"total_size = defaultdict(int)\n", | |
"lastmonth = None\n", | |
"for month in sorted(total_new.keys()):\n", | |
" total_size[month] += total_new[month] - total_inactive[month]\n", | |
" if lastmonth:\n", | |
" total_size[month] += total_size[lastmonth]\n", | |
" lastmonth = month" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"gain = deepcopy(total_new)\n", | |
"loss = deepcopy(total_inactive)\n", | |
"active = deepcopy(total_size)\n", | |
"\n", | |
"with open('output/activity.csv', 'w') as f:\n", | |
" keys = sorted(total_new.keys())\n", | |
" f.write('Month,new,inactive,community size\\n')\n", | |
" for key in keys:\n", | |
" f.write(key + ',' + str(total_new[key]) + ',' + str(total_inactive[key]) + ',' + str(total_size[key]) + '\\n')\n", | |
"\n", | |
"del gain['2017-07']\n", | |
"del gain['2017-06']\n", | |
"del gain['2017-05']\n", | |
"del gain['2017-04']\n", | |
"del loss['2017-07']\n", | |
"del loss['2017-06']\n", | |
"del loss['2017-05']\n", | |
"del loss['2017-04']\n", | |
"del active['2017-07']\n", | |
"del active['2017-06']\n", | |
"del active['2017-05']\n", | |
"del active['2017-04']" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Graphing community size" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# indent = np.arange(len(total_size.keys()))\n", | |
"xticks = list(sorted(total_size.keys()))[0:-4]\n", | |
"indent = date2num([datetime.strptime(x, '%Y-%m') for x in sorted(gain.keys())])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"width = 10\n", | |
"gap = 0.5\n", | |
"fig = plt.figure()\n", | |
"fig.set_size_inches(22,13)\n", | |
"fig.suptitle('Xbox playerbase growth and decay')\n", | |
"ax = fig.add_subplot(111)\n", | |
"ax.xaxis.set_major_locator(mdates.MonthLocator())\n", | |
"ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))\n", | |
"ax.yaxis.set_major_locator(ticker.MultipleLocator(25000))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"bar_gain = ax.bar(\n", | |
" #indent,\n", | |
" # [datetime.strptime(x, '%Y-%m') for x in xticks],\n", | |
" indent - 5,\n", | |
" [val for _, val in sorted(gain.iteritems())],\n", | |
" width=width,\n", | |
"# color='green',\n", | |
" color='#009e73',\n", | |
" align='center'\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"bar_loss = ax.bar(\n", | |
" #indent + width,\n", | |
" # [datetime.strptime(x, '%Y-%m') for x in xticks],\n", | |
" indent + 5,\n", | |
" [val for _, val in sorted(loss.iteritems())],\n", | |
" width=width,\n", | |
"# color='red',\n", | |
" color='#d55e00',\n", | |
" align='center'\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"line_active = ax.plot(\n", | |
" indent,\n", | |
" [val for _, val in sorted(active.iteritems())],\n", | |
"# color='c',\n", | |
" color='#0072b2',\n", | |
" marker='o'\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<matplotlib.legend.Legend at 0x186ee2dd8>" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"fig.legend((bar_gain, bar_loss), ('New Accounts', 'Gone inactive'))\n", | |
"# ax.xaxis.set_ticklabels(xticks)\n", | |
"# _, labels = plt.xticks()\n", | |
"# plt.setp(labels, rotation=45)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fig.autofmt_xdate()\n", | |
"fig.savefig('output/xbox playerbase activity.png', dpi=600)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"del fig\n", | |
"plt.close()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### New accounts vs short-lived accounts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"lived_1 = defaultdict(int)\n", | |
"lived_7 = defaultdict(int)\n", | |
"lived_30 = defaultdict(int)\n", | |
"lived_90 = defaultdict(int)\n", | |
"lived_long = defaultdict(int)\n", | |
"for player in activity:\n", | |
" active_for = abs((datetime.strptime(player[1], '%Y-%m-%d') - datetime.strptime(player[0], '%Y-%m-%d')).days)\n", | |
" if active_for <= 1:\n", | |
" lived_1[player[0].rsplit('-', 1)[0]] += 1\n", | |
" elif active_for <= 7:\n", | |
" lived_7[player[0].rsplit('-', 1)[0]] += 1\n", | |
" elif active_for <= 30:\n", | |
" lived_30[player[0].rsplit('-', 1)[0]] += 1\n", | |
" elif active_for <= 90:\n", | |
" lived_90[player[0].rsplit('-', 1)[0]] += 1\n", | |
" else:\n", | |
" lived_long[player[0].rsplit('-', 1)[0]] += 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [], | |
"source": [ | |
"with open('output/short lives.csv', 'w') as f:\n", | |
" keys = sorted(lived_1.keys())\n", | |
" f.write('Month,New Accounts,1 Day,7 Days,30 Days,90 Days,Long-term\\n')\n", | |
" for key in keys:\n", | |
" f.write(key + ',' + str(total_new[key]) + ',' + str(lived_1[key]) + ',' + str(lived_7[key]) + ',' + str(lived_30[key]) + ',' + str(lived_90[key]) + ',' + str(lived_long[key]) + '\\n')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Battles vs age" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 86, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"battle_age = []\n", | |
"with open('output/battles over time.csv', 'w') as f:\n", | |
" f.write('Age (Days),Battles\\n')\n", | |
" for _, row in data.iterrows():\n", | |
" try:\n", | |
" if datetime.utcfromtimestamp(row['last_battle_time_raw']).year == 1970:\n", | |
" continue\n", | |
" battle_age.append((abs((datetime.utcfromtimestamp(row['last_battle_time_raw']) - datetime.utcfromtimestamp(row['created_at_raw'])).days), row['battles']))\n", | |
" f.write(str(abs((datetime.utcfromtimestamp(row['last_battle_time_raw']) - datetime.utcfromtimestamp(row['created_at_raw'])).days)) + ',' + str(row['battles']) + '\\n')\n", | |
" except TypeError:\n", | |
" pass" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 98, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"fig = plt.figure()\n", | |
"fig.set_size_inches(22,13)\n", | |
"fig.suptitle('Xbox battles vs account age')\n", | |
"ax = fig.add_subplot(111)\n", | |
"plt.xlabel('Account age (days)')\n", | |
"plt.ylabel('Battle count')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 99, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<matplotlib.collections.PathCollection at 0x186d21d30>" | |
] | |
}, | |
"execution_count": 99, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ax.scatter(\n", | |
" [ba[0] for ba in battle_age],\n", | |
" [ba[1] for ba in battle_age],\n", | |
" c=np.random.rand(len(battle_age)),\n", | |
" alpha=0.05\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 102, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"fig.savefig('output/battle vs age.png', dpi=600)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 97, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"del fig\n", | |
"plt.close()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Data used for this session can be downloaded from: https://drive.google.com/open?id=0B1qZrP_GY8KXeHh4WjZCSmdIQkE
A combined listing of graphs that can be had at Plot.ly