-
-
Save kynan/26e76632ef07bae08265 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 74, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from glob import glob\n", | |
"import numpy as np\n", | |
"from os import path\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 47, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def get_data(fname):\n", | |
" df = pd.read_csv(fname, header=False,\n", | |
" names=['artist', 'album', 'track', 'timestamp'],\n", | |
" parse_dates=['timestamp'])\n", | |
" df['user'] = path.splitext(fname)[0]\n", | |
" return df\n", | |
"\n", | |
"# for c in ['artist', 'album']:\n", | |
"# df[c + '_count'] = df.groupby([c])[c].transform(len)\n", | |
"\n", | |
"# return df.sort(['artist_count', 'album_count'], ascending=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.concat([get_data(f) for f in glob('*.csv')])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"df['count'] = 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 72, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"t = pd.pivot_table(df, values=['count'], index=['artist'], columns=['user'],\n", | |
" aggfunc=sum, margins=True).fillna(0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 77, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"ts = np.sqrt(t + 0.5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 71, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th colspan=\"4\" halign=\"left\">count</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>user</th>\n", | |
" <th>kynanmaclachlan</th>\n", | |
" <th>olorton</th>\n", | |
" <th>papanoa</th>\n", | |
" <th>All</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>user</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"4\" valign=\"top\">count</th>\n", | |
" <th>kynanmaclachlan</th>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 0.988175</td>\n", | |
" <td> 0.943288</td>\n", | |
" <td> 0.993855</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>olorton</th>\n", | |
" <td> 0.988175</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 0.999065</td>\n", | |
" <td> 0.998225</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>papanoa</th>\n", | |
" <td> 0.943288</td>\n", | |
" <td> 0.999065</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 0.941066</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>All</th>\n", | |
" <td> 0.993855</td>\n", | |
" <td> 0.998225</td>\n", | |
" <td> 0.941066</td>\n", | |
" <td> 1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" count \n", | |
"user kynanmaclachlan olorton papanoa All\n", | |
" user \n", | |
"count kynanmaclachlan 1.000000 0.988175 0.943288 0.993855\n", | |
" olorton 0.988175 1.000000 0.999065 0.998225\n", | |
" papanoa 0.943288 0.999065 1.000000 0.941066\n", | |
" All 0.993855 0.998225 0.941066 1.000000" | |
] | |
}, | |
"execution_count": 71, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"t.corr()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 78, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th colspan=\"4\" halign=\"left\">count</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>user</th>\n", | |
" <th>kynanmaclachlan</th>\n", | |
" <th>olorton</th>\n", | |
" <th>papanoa</th>\n", | |
" <th>All</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th></th>\n", | |
" <th>user</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"4\" valign=\"top\">count</th>\n", | |
" <th>kynanmaclachlan</th>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 0.585364</td>\n", | |
" <td> 0.609860</td>\n", | |
" <td> 0.771564</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>olorton</th>\n", | |
" <td> 0.585364</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 0.563277</td>\n", | |
" <td> 0.949762</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>papanoa</th>\n", | |
" <td> 0.609860</td>\n", | |
" <td> 0.563277</td>\n", | |
" <td> 1.000000</td>\n", | |
" <td> 0.671921</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>All</th>\n", | |
" <td> 0.771564</td>\n", | |
" <td> 0.949762</td>\n", | |
" <td> 0.671921</td>\n", | |
" <td> 1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" count \n", | |
"user kynanmaclachlan olorton papanoa All\n", | |
" user \n", | |
"count kynanmaclachlan 1.000000 0.585364 0.609860 0.771564\n", | |
" olorton 0.585364 1.000000 0.563277 0.949762\n", | |
" papanoa 0.609860 0.563277 1.000000 0.671921\n", | |
" All 0.771564 0.949762 0.671921 1.000000" | |
] | |
}, | |
"execution_count": 78, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ts.corr()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment