Skip to content

Instantly share code, notes, and snippets.

@alastair
Created February 14, 2018 16:07
Show Gist options
  • Save alastair/fbd86f2a243cf04393e8f7c04cbe0657 to your computer and use it in GitHub Desktop.
Save alastair/fbd86f2a243cf04393e8f7c04cbe0657 to your computer and use it in GitHub Desktop.
Checking if people apply the 'classic rock' tag to newer music each year
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%load_ext sql\n",
"%config SqlMagic.feedback=False"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Connected: musicbrainzro@musicbrainz'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%sql postgres://musicbrainzro@asplab-db.s.upf.edu/musicbrainz"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"result = %sql SELECT * from recording_tag where tag = 987;"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"data = []\n",
"for r in result:\n",
" data.append({'recording': r['recording'], 'tag_date': r['last_updated'].year})"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"for row in data:\n",
" rec = row['recording']\n",
" years = %sql select first_release_date_year from release_group_meta, release, medium, track where track.recording = :rec and track.medium = medium.id and medium.release = release.id and release_group_meta.id = release.release_group;\n",
" dates = [r[0] for r in years if r[0]]\n",
" if dates:\n",
" row['release_date'] = min(dates)\n",
" else:\n",
" row['release_date'] = 0"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"tag_dates = set([i['tag_date'] for i in data])\n",
"tag_date_to_release = {}\n",
"for td in tag_dates:\n",
" vals = [d['release_date'] for d in data if d['tag_date'] == td and d['release_date']]\n",
" tag_date_to_release[td] = vals"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2011: mean 1988.69 total 532\n",
"2012: mean 1989.18 total 238\n",
"2013: mean 1991.58 total 273\n",
"2014: mean 1991.87 total 297\n",
"2015: mean 1991.74 total 861\n",
"2016: mean 1981.92 total 721\n",
"2017: mean 1980.06 total 1425\n",
"2018: mean 1977.32 total 151\n"
]
}
],
"source": [
"totals = []\n",
"for d in sorted(list(tag_dates)):\n",
" vals = tag_date_to_release[d]\n",
" totals.append(len(vals))\n",
" print(\"%s: mean %.2f total %s\" % (d, numpy.mean(vals), len(vals)))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"562.25"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"numpy.mean(totals)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"x_es = sorted(tag_date_to_release.keys())\n",
"y_es = [tag_date_to_release[x] for x in x_es]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([<matplotlib.axis.XTick at 0x7fcfebaa86d8>,\n",
" <matplotlib.axis.XTick at 0x7fcfebaad6d8>,\n",
" <matplotlib.axis.XTick at 0x7fcfe99d3940>,\n",
" <matplotlib.axis.XTick at 0x7fcfe996ccc0>,\n",
" <matplotlib.axis.XTick at 0x7fcfe9972400>,\n",
" <matplotlib.axis.XTick at 0x7fcfe9972b00>,\n",
" <matplotlib.axis.XTick at 0x7fcfe9978240>,\n",
" <matplotlib.axis.XTick at 0x7fcfe9978940>],\n",
" <a list of 8 Text xticklabel objects>)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fd014053f28>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.set(rc={\"figure.figsize\": (15, 8)})\n",
"ax = sns.boxplot(data=y_es, width=.5)\n",
"ax.set_xlabel('Year that tag was added')\n",
"ax.set_ylabel('Average release year')\n",
"ax.set_title(\"'classic rock' recording tags in MusicBrainz\")\n",
"plt.xticks(plt.xticks()[0], x_es)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment