Skip to content

Instantly share code, notes, and snippets.

@kynan
Created August 22, 2015 14:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kynan/26e76632ef07bae08265 to your computer and use it in GitHub Desktop.
Save kynan/26e76632ef07bae08265 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from glob import glob\n",
"import numpy as np\n",
"from os import path\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def get_data(fname):\n",
" df = pd.read_csv(fname, header=False,\n",
" names=['artist', 'album', 'track', 'timestamp'],\n",
" parse_dates=['timestamp'])\n",
" df['user'] = path.splitext(fname)[0]\n",
" return df\n",
"\n",
"# for c in ['artist', 'album']:\n",
"# df[c + '_count'] = df.groupby([c])[c].transform(len)\n",
"\n",
"# return df.sort(['artist_count', 'album_count'], ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = pd.concat([get_data(f) for f in glob('*.csv')])"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['count'] = 1"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"t = pd.pivot_table(df, values=['count'], index=['artist'], columns=['user'],\n",
" aggfunc=sum, margins=True).fillna(0)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ts = np.sqrt(t + 0.5)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th colspan=\"4\" halign=\"left\">count</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>user</th>\n",
" <th>kynanmaclachlan</th>\n",
" <th>olorton</th>\n",
" <th>papanoa</th>\n",
" <th>All</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>user</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">count</th>\n",
" <th>kynanmaclachlan</th>\n",
" <td> 1.000000</td>\n",
" <td> 0.988175</td>\n",
" <td> 0.943288</td>\n",
" <td> 0.993855</td>\n",
" </tr>\n",
" <tr>\n",
" <th>olorton</th>\n",
" <td> 0.988175</td>\n",
" <td> 1.000000</td>\n",
" <td> 0.999065</td>\n",
" <td> 0.998225</td>\n",
" </tr>\n",
" <tr>\n",
" <th>papanoa</th>\n",
" <td> 0.943288</td>\n",
" <td> 0.999065</td>\n",
" <td> 1.000000</td>\n",
" <td> 0.941066</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All</th>\n",
" <td> 0.993855</td>\n",
" <td> 0.998225</td>\n",
" <td> 0.941066</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count \n",
"user kynanmaclachlan olorton papanoa All\n",
" user \n",
"count kynanmaclachlan 1.000000 0.988175 0.943288 0.993855\n",
" olorton 0.988175 1.000000 0.999065 0.998225\n",
" papanoa 0.943288 0.999065 1.000000 0.941066\n",
" All 0.993855 0.998225 0.941066 1.000000"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t.corr()"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th colspan=\"4\" halign=\"left\">count</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>user</th>\n",
" <th>kynanmaclachlan</th>\n",
" <th>olorton</th>\n",
" <th>papanoa</th>\n",
" <th>All</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>user</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">count</th>\n",
" <th>kynanmaclachlan</th>\n",
" <td> 1.000000</td>\n",
" <td> 0.585364</td>\n",
" <td> 0.609860</td>\n",
" <td> 0.771564</td>\n",
" </tr>\n",
" <tr>\n",
" <th>olorton</th>\n",
" <td> 0.585364</td>\n",
" <td> 1.000000</td>\n",
" <td> 0.563277</td>\n",
" <td> 0.949762</td>\n",
" </tr>\n",
" <tr>\n",
" <th>papanoa</th>\n",
" <td> 0.609860</td>\n",
" <td> 0.563277</td>\n",
" <td> 1.000000</td>\n",
" <td> 0.671921</td>\n",
" </tr>\n",
" <tr>\n",
" <th>All</th>\n",
" <td> 0.771564</td>\n",
" <td> 0.949762</td>\n",
" <td> 0.671921</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count \n",
"user kynanmaclachlan olorton papanoa All\n",
" user \n",
"count kynanmaclachlan 1.000000 0.585364 0.609860 0.771564\n",
" olorton 0.585364 1.000000 0.563277 0.949762\n",
" papanoa 0.609860 0.563277 1.000000 0.671921\n",
" All 0.771564 0.949762 0.671921 1.000000"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ts.corr()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment