Skip to content

Instantly share code, notes, and snippets.

@muhammadhafiz
Created March 24, 2014 20:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save muhammadhafiz/062a21c9caa3cae5cb2a to your computer and use it in GitHub Desktop.
Save muhammadhafiz/062a21c9caa3cae5cb2a to your computer and use it in GitHub Desktop.
MAT240E-Final II [Raag Analysis]
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"#Muhammad Hafiz Wan Rosli\n",
"#MAT240E-Final II [Raag Analysis]\n",
"#Raag Analysis\n",
"#Source: Master of Sitar- 01 Raag Alahya Bilawal (Early Morning Raag).mp3\n",
"#Ground Truth:\n",
"#Jhala: 00:00 - 00:42\n",
"#Gat: 00:42 - 18:46 (1080+46 = 1126 secs)\n",
"#Jhala: 18:46 - end (check spectral difference 1 minute before & after)\n",
"#Gat-Jhala transition: 18:45 - 18:46\n",
"\n",
"#Code written in python : https://www.python.org/\n",
"#Interactive computing ipython : http://ipython.org/\n",
"#Audio Analysis & MIR Library : http://essentia.upf.edu/"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"from essentia import *\n",
"from essentia.streaming import *\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib.widgets import Slider, Button, RadioButtons, CheckButtons\n",
"plt.rcParams['figure.figsize'] = (16,9)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sr = 44100\n",
"frameSize = 2048\n",
"hopSize = 1024\n",
"\n",
"loader = MonoLoader(filename = '/Users/muhammadhafiz/Documents/work/ComputationalEthno/samples/RaagAlahyaBilawal0_3minutes.mp3', sampleRate=sr)\n",
"frameCutter = FrameCutter(frameSize = frameSize, hopSize = hopSize)\n",
"w = Windowing(type = 'hann')\n",
"pitchyin = PitchYinFFT()\n",
"spec = Spectrum()\n",
"mfcc = MFCC()\n",
"centroid = Centroid()\n",
"rolloff = RollOff()\n",
"flux = Flux()\n",
"zcr = ZeroCrossingRate()\n",
"rms = RMS()\n",
"\n",
"#initialize algorithms\n",
"onsetDetector = OnsetRate()\n",
"beatsLoudness = BeatsLoudness()\n",
"centralMoments = CentralMoments()\n",
"derivativeSFX = DerivativeSFX()\n",
"envelope = Envelope()\n",
"#distributionShape = DistributionShape()\n",
"erbBands = ERBBands()\n",
"frequencyBands = FrequencyBands()#frequencyBands([141, 142, 213, 282, 284, 286, 355, 358, 428]))\n",
"#harmonicPeaks = HarmonicPeaks()\n",
"#oddToEvenHarmonicEnergyRatio = OddToEvenHarmonicEnergyRatio()\n",
"#pitchContours = PitchContours()\n",
"#pitchContoursMelody = PitchCountoursMelody()\n",
"pitchSalience = PitchSalience()\n",
"pitchSalienceFunction = PitchSalienceFunction()###\n",
"pitchSalienceFunctionPeaks = PitchSalienceFunctionPeaks()###\n",
"predominantMelody = PredominantMelody()\n",
"SBIC = SBic()#\n",
"spectralComplexity = SpectralComplexity()\n",
"spectralContrast = SpectralContrast()\n",
"spectralPeaks = SpectralPeaks()\n",
"#tcToTotal = TCToTotal()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#connect sink+source networks\n",
"loader.audio >> onsetDetector.signal#\n",
"loader.audio >> beatsLoudness.signal\n",
"#loader.audio >> envelope.signal \n",
"loader.audio >> predominantMelody.signal\n",
"loader.audio >> frameCutter.signal\n",
"frameCutter.frame >> w.frame >> spec.frame\n",
"frameCutter.frame >> zcr.signal\n",
"frameCutter.frame >> rms.array\n",
"#w.frame >> centralMoments.array\n",
"#envelope.signal >> derivativeSFX.envelope\n",
"\n",
"spec.spectrum >> centroid.array\n",
"spec.spectrum >> erbBands.spectrum\n",
"spec.spectrum >> flux.spectrum\n",
"spec.spectrum >> frequencyBands.spectrum\n",
"spec.spectrum >> mfcc.spectrum\n",
"spec.spectrum >> pitchSalience.spectrum\n",
"spec.spectrum >> pitchyin.spectrum\n",
"spec.spectrum >> rolloff.spectrum\n",
"spec.spectrum >> spectralComplexity.spectrum\n",
"spec.spectrum >> spectralContrast.spectrum###\n",
"spec.spectrum >> spectralPeaks.spectrum\n",
"\n",
"#spectralPeaks.frequencies >> pitchSalienceFunction.frequencies\n",
"#spectralPeaks.magnitudes >> pitchSalienceFunction.magnitudes\n",
"#pitchSalienceFunction.salienceFunction >> pitchSalienceFunctionPeaks.salienceFunction"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"<essentia.streaming._StreamConnector instance at 0x11141ba70>"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#connect to individual pools\n",
"pool = essentia.Pool()\n",
"\n",
"onsetDetector.onsetTimes >> (pool, 'onset.onsetTimes')\n",
"onsetDetector.onsetRate >> (pool, 'onset.onsetRate')\n",
"beatsLoudness.loudness >> (pool, 'beatsLoudness.loudness')\n",
"beatsLoudness.loudnessBandRatio >> (pool, 'beatsLoudness.loudnessBandRatio')\n",
"#derivativeSFX.derAvAfterMax >> (pool, 'derivativeSFX.derAvAfterMax')\n",
"#derivativeSFX.maxDerBeforeMax >> (pool, 'derivativeSFX.maxDerBeforeMax')\n",
"erbBands.bands >> (pool, 'erbBands.bands')\n",
"frequencyBands.bands >> (pool, 'frequencyBands.bands')\n",
"pitchSalience.pitchSalience >> (pool, 'pitchSalience.pitchSalience')\n",
"#pitchSalienceFunction.salienceFunction >> (pool, 'pitchSalienceFunction.salienceFunction')\n",
"#pitchSalienceFunctionPeaks.salienceBins >> (pool, 'pitchSalienceFunctionPeaks.salienceBins')\n",
"#pitchSalienceFunctionPeaks.salienceValues >> (pool, 'pitchSalienceFunctionPeaks.salienceValues')\n",
"predominantMelody.pitch >> (pool, 'predominantMelody.pitch')\n",
"predominantMelody.pitchConfidence >> (pool, 'predominantMelody.pitchConfidence')\n",
"spectralComplexity.spectralComplexity >> (pool, 'spectralComplexity.spectralComplexity')\n",
"spectralContrast.spectralContrast >> (pool, 'spectralContrast.spectralContrast')###\n",
"spectralContrast.spectralValley >> (pool, 'spectralContrast.spectralValley')###\n",
"spectralPeaks.frequencies >> (pool, 'spectralPeaks.frequencies')\n",
"spectralPeaks.magnitudes >> (pool, 'spectralPeaks.magnitudes')\n",
"\n",
"\n",
"pitchyin.pitch >> (pool, 'pitchyin.pitch')\n",
"pitchyin.pitchConfidence >> (pool, 'pitchyin.pitchConfidence')\n",
"spec.spectrum >> (pool, 'lowlevel.spectrum')\n",
"mfcc.bands >> (pool, 'lowlevel.mfcc_bands')\n",
"mfcc.mfcc >> (pool, 'lowlevel.mfcc')\n",
"centroid.centroid >> (pool, 'lowlevel.centroid')\n",
"rolloff.rollOff >> (pool, 'lowlevel.rolloff')\n",
"flux.flux >> (pool, 'lowlevel.flux')\n",
"\n",
"zcr.zeroCrossingRate >> (pool, 'lowlevel.zcr')\n",
"rms.rms >> (pool, 'lowlevel.rms')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"essentia.reset(loader)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"essentia.run(loader)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"timeConversion = 44100/ float(hopSize)\n",
"groundTruthAlap=42 #secs\n",
"analysisLength = 60\n",
"\n",
"#Normalize pools 0-100 (percent)\n",
"pitchYinPool = (pool['pitchyin.pitch'][:analysisLength*timeConversion] / max(pool['pitchyin.pitch'][:analysisLength*timeConversion])) * 100\n",
"pitchYinConfidencePool = (pool['pitchyin.pitchConfidence'][:analysisLength*timeConversion] / max(pool['pitchyin.pitchConfidence'][:analysisLength*timeConversion])) * 100\n",
"centroidPool = (pool['lowlevel.centroid'][:analysisLength*timeConversion] / max(pool['lowlevel.centroid'][:analysisLength*timeConversion])) * 100\n",
"rolloffPool = (pool['lowlevel.rolloff'][:analysisLength*timeConversion] / max(pool['lowlevel.rolloff'][:analysisLength*timeConversion])) * 100\n",
"fluxPool = (pool['lowlevel.flux'][:analysisLength*timeConversion] / max(pool['lowlevel.flux'][:analysisLength*timeConversion])) * 100\n",
"zcrPool = (pool['lowlevel.zcr'][:analysisLength*timeConversion] / max(pool['lowlevel.zcr'][:analysisLength*timeConversion])) * 100\n",
"rmsPool = (pool['lowlevel.rms'][:analysisLength*timeConversion] / max(pool['lowlevel.rms'][:analysisLength*timeConversion])) * 100\n",
"pitchSaliencePool = (pool['pitchSalience.pitchSalience'][:analysisLength*timeConversion] / max(pool['pitchSalience.pitchSalience'][:analysisLength*timeConversion])) * 100\n",
"spectralComplexityPool = (pool['spectralComplexity.spectralComplexity'][:analysisLength*timeConversion] / max(pool['spectralComplexity.spectralComplexity'][:analysisLength*timeConversion])) * 100\n",
"\n",
"#plot values\n",
"fig, ax1 = plt.subplots()\n",
"l = ax1.vlines(groundTruthAlap, 0, 100, lw=3, alpha = 0.5, color='k')\n",
"l0, = ax1.plot(np.linspace(0, analysisLength, len(pool['pitchyin.pitch'][:analysisLength*timeConversion])), pitchYinPool, visible=False)\n",
"l1, = ax1.plot(np.linspace(0, analysisLength, len(pool['pitchyin.pitchConfidence'][:analysisLength*timeConversion])), pitchYinConfidencePool, visible=False)\n",
"l2, = ax1.plot(np.linspace(0, analysisLength, len(pool['lowlevel.centroid'][:analysisLength*timeConversion])), centroidPool, visible=False)\n",
"p2 = ax1.vlines((np.argmax(pool['lowlevel.centroid'][10*timeConversion: analysisLength*timeConversion])/ timeConversion)+ 10, 0, (max(pool['lowlevel.centroid'][10*timeConversion: analysisLength*timeConversion])/ max(pool['lowlevel.centroid'][:analysisLength*timeConversion]))*100, color ='r', visible=False)\n",
"l3, = ax1.plot(np.linspace(0, analysisLength, len(pool['lowlevel.rolloff'][:analysisLength*timeConversion])), rolloffPool, visible=False)\n",
"p3 = ax1.vlines((np.argmax(pool['lowlevel.rolloff'][10*timeConversion: analysisLength*timeConversion])/ timeConversion)+ 10, 0, (max(pool['lowlevel.rolloff'][10*timeConversion: analysisLength*timeConversion])/ max(pool['lowlevel.rolloff'][:analysisLength*timeConversion]))*100, color ='r', visible=False)\n",
"l4, = ax1.plot(np.linspace(0, analysisLength, len(pool['lowlevel.flux'][:analysisLength*timeConversion])), fluxPool, visible=False)\n",
"p4 = ax1.vlines((np.argmax(pool['lowlevel.flux'][10*timeConversion: analysisLength*timeConversion])/ timeConversion)+ 10, 0, (max(pool['lowlevel.flux'][10*timeConversion: analysisLength*timeConversion])/ max(pool['lowlevel.flux'][:analysisLength*timeConversion]))*100, color ='r', visible=False)\n",
"l5, = ax1.plot(np.linspace(0, analysisLength, len(pool['lowlevel.zcr'][:analysisLength*timeConversion])), zcrPool, visible=False)\n",
"p5 = ax1.vlines((np.argmax(pool['lowlevel.zcr'][10*timeConversion: analysisLength*timeConversion])/ timeConversion)+ 10, 0, (max(pool['lowlevel.zcr'][10*timeConversion: analysisLength*timeConversion])/ max(pool['lowlevel.zcr'][:analysisLength*timeConversion]))*100, color ='r', visible=False)\n",
"l6, = ax1.plot(np.linspace(0, analysisLength, len(pool['lowlevel.rms'][:analysisLength*timeConversion])), rmsPool, visible=False)\n",
"p6 = ax1.vlines((np.argmax(pool['lowlevel.rms'][10*timeConversion: analysisLength*timeConversion])/ timeConversion)+ 10, 0, (max(pool['lowlevel.rms'][10*timeConversion: analysisLength*timeConversion])/ max(pool['lowlevel.rms'][:analysisLength*timeConversion]))*100, color ='r', visible=False)\n",
"l7, = ax1.plot(np.linspace(0, analysisLength, len(pool['pitchSalience.pitchSalience'][:analysisLength*timeConversion])), pitchSaliencePool, visible=False)\n",
"p7 = ax1.vlines((np.argmax(pool['pitchSalience.pitchSalience'][10*timeConversion: analysisLength*timeConversion])/ timeConversion)+ 10, 0, (max(pool['pitchSalience.pitchSalience'][10*timeConversion: analysisLength*timeConversion])/ max(pool['pitchSalience.pitchSalience'][:analysisLength*timeConversion]))*100, color ='r', visible=False)\n",
"l8, = ax1.plot(np.linspace(0, analysisLength, len(pool['spectralComplexity.spectralComplexity'][:analysisLength*timeConversion])), spectralComplexityPool, visible=False)\n",
"p8 = ax1.vlines((np.argmax(pool['spectralComplexity.spectralComplexity'][10*timeConversion: analysisLength*timeConversion])/ timeConversion)+ 10, 0, (max(pool['spectralComplexity.spectralComplexity'][10*timeConversion: analysisLength*timeConversion])/ max(pool['spectralComplexity.spectralComplexity'][:analysisLength*timeConversion]))*100, color ='r', visible=False)\n",
"#plot(linspace(0, 180, len(pool['erbBands.bands'][:180*timeConversion])), pool['erbBands.bands'][:180*timeConversion])\n",
"\n",
"\n",
"\n",
"#plt.legend(['pitch', 'pitch confidence', 'spectral centroid', 'spectral rolloff', 'spectral flux', 'ZCR', 'RMS', 'onset'])\n",
"plt.suptitle('Raag Analysis: Alahya Bilawal [Ravi Shankar - Master of Sitar]', fontsize=14, fontweight='bold')\n",
"plt.title('Ground truth- Alap: 42 seconds')\n",
"plt.ylabel('percentage to maximum in analysis section')\n",
"plt.xlabel('time (seconds)')\n",
"plt.subplots_adjust(left=0.3)#, bottom=0.25)\n",
"rax = plt.axes([0.025, 0.65, 0.22, 0.25])#left,bottom, width, height\n",
"check = CheckButtons(rax, ('pitch', 'pitch confidence', 'spectral centroid', 'spectral rolloff', 'spectral flux', 'ZCR', 'RMS', 'pitch salience', 'spectral complexity'), (False, False, False, False, False, False, False, False, False))\n",
"\n",
"#checkbox controls\n",
"def displayMode(label):\n",
" if label == 'pitch': \n",
" l0.set_visible(not l0.get_visible())\n",
" elif label == 'pitch confidence': \n",
" l1.set_visible(not l1.get_visible())\n",
" elif label == 'spectral centroid': \n",
" l2.set_visible(not l2.get_visible())\n",
" p2.set_visible(not p2.get_visible())\n",
" elif label == 'spectral rolloff': \n",
" l3.set_visible(not l3.get_visible())\n",
" p3.set_visible(not p3.get_visible())\n",
" elif label == 'spectral flux': \n",
" l4.set_visible(not l4.get_visible())\n",
" p4.set_visible(not p4.get_visible())\n",
" elif label == 'ZCR':\n",
" l5.set_visible(not l5.get_visible())\n",
" p5.set_visible(not p5.get_visible())\n",
" elif label == 'RMS': \n",
" l6.set_visible(not l6.get_visible())\n",
" p6.set_visible(not p6.get_visible())\n",
" elif label == 'pitch salience': \n",
" l7.set_visible(not l7.get_visible())\n",
" p7.set_visible(not p7.get_visible())\n",
" elif label == 'spectral complexity': \n",
" l8.set_visible(not l8.get_visible())\n",
" p8.set_visible(not p8.get_visible())\n",
" plt.draw()\n",
"check.on_clicked(displayMode)\n",
"plt.show()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment