Skip to content

Instantly share code, notes, and snippets.

@JonathanReeve
Last active August 29, 2015 14:08
Show Gist options
  • Save JonathanReeve/5cd08522c05cc557398a to your computer and use it in GitHub Desktop.
Save JonathanReeve/5cd08522c05cc557398a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"worksheets": [
{
"cells": [
{
"metadata": {},
"cell_type": "code",
"input": "%matplotlib inline",
"prompt_number": 1,
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "def PCA(data, dims_rescaled_data=2):\n \"\"\"\n returns: data transformed in 2 dims/columns + regenerated original data\n pass in: data as 2D NumPy array\n \"\"\"\n import numpy as NP\n from scipy import linalg as LA\n m, n = data.shape\n # mean center the data\n data -= data.mean(axis=0)\n # calculate the covariance matrix\n # TODO: use the correlation matrix instead\n R = NP.cov(data, rowvar=False)\n # calculate eigenvectors & eigenvalues of the covariance matrix\n # use 'eigh' rather than 'eig' since R is symmetric, \n # the performance gain is substantial\n evals, evecs = LA.eigh(R)\n # sort eigenvalue in decreasing order\n idx = NP.argsort(evals)[::-1]\n evecs = evecs[:,idx]\n # sort eigenvectors according to same index\n evals = evals[idx]\n # select the first n eigenvectors (n is desired dimension\n # of rescaled data array, or dims_rescaled_data)\n evecs = evecs[:, :dims_rescaled_data]\n # carry out the transformation on the data using eigenvectors\n # and return the re-scaled data, eigenvalues, and eigenvectors\n return NP.dot(evecs.T, data.T).T\n\ndef test_PCA(data, dims_rescaled_data=2):\n '''\n test by attempting to recover original data array from\n the eigenvectors of its covariance matrix & comparing that\n 'recovered' array with the original data\n '''\n _ , _ , eigenvectors = PCA(data, dim_rescaled_data=2)\n data_recovered = NP.dot(eigenvectors, m).T\n data_recovered += data_recovered.mean(axis=0)\n assert NP.allclose(data, data_recovered)\n\n\ndef plot_pca(data):\n from matplotlib import pyplot as MPL\n clr1 = '#2026B2'\n fig = MPL.figure()\n ax1 = fig.add_subplot(111)\n data_resc = PCA(data)\n ax1.plot(data_resc[:, 0], data_resc[:, 1], '.', mfc=clr1, mec=clr1)\n MPL.show()",
"prompt_number": 2,
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "import numpy as NP\ndf='/home/jon/Dropbox/Research/stylometry-experiments/iris.csv' \ndata = NP.loadtxt(df, delimiter=',')",
"prompt_number": 3,
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "data.shape",
"prompt_number": 4,
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"text": "(150, 4)",
"prompt_number": 4
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "plot_pca(data)",
"prompt_number": 7,
"outputs": [
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEACAYAAAC9Gb03AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAF3lJREFUeJzt3XuMXNV9wPGvjRdsh6wJgtpgu7VKSAIOpAbkkAcwLSE1\nbgVFCg1IqCGVSJQ0pFVCShyMbIRa0xSaYAVaqoaKogbSBEQdjEtIwmCKEhMCbGjAPFw7BRJewmAb\nSlkb9487Y4bxPO7MfZx773w/0mpnd+/MPbt797fn/M7vnAuSJEmSJEmSJEmSJEmSJElK2bXAs8BD\nXb5eA14GHmi8Lc+nWZKktJwALKJ3oF+TW2skSW8xNYXXuBvY2ueYKSmcR5I0hDQCfT+7gQ8CE8Bt\nwJE5nFOSlLIFdE/dvB2Y2Xh8KvBYHg2SJEWm5XCO7S2P1wFXAwcCL7YedNhhh+3etGlTDs2RpErZ\nBLyz1wF5pG5m82aOfnHj8YvtB23atIndu3cX/m3FihXB21CFNtpO21n0t7K0EzisXxBOo0d/A3AS\ncBDwJLACGGt87RrgY8BngJ3Aq8BZKZxTkhRTGoH+7D5fv6rxJkkKII/UTaXUarXQTeirDG0E25k2\n25musrQzjiLVt+9u5JskSTFNmTIF+sRye/SSVHEGekmqOAO9JFWcgV6SKs5AL0kVZ6CXpIoz0EtS\nxRnoJaniDPSSVHEGekmquDz2o5dysWz5BJu37GD6jGmsvuIYxsfH+j9JGgH26FUZm7fsYMNPX+Su\n9c+x7OKJ0M2RCsNAr8qYPiMaoB591CxWXfq+wK2RisPdK1UZ27ZNsuziCVZd+j7TNhoZcXavNNBL\nUom5TbEkyaobKQQrhJQne/RSAFYIKU8GeikAK4SUJydjpQCsEFJarLqRpIqz6kaSZKCXpKoz0EtS\nxRnoJaniDPSSVHFpBPprgWeBh3ocsxp4HJgAFqVwTklSTGkE+n8GlvT4+lLgncDhwKeAv0/hnJKk\nmNII9HcDW3t8/TTgusbjDcABwOwUzitJiiGPTc3mAk+2fPwUMI8o3SPF5kZg0nDy2r2yfdWWS2AV\nWzPAb3x0Gy9v2xl97uIJrrryuMAtk8ohj0D/NDC/5eN5jc/tZeXKlXse12o1arValu1SSTR3emxy\nIzCNsnq9Tr1eH+g5ae11swD4HnBUh68tBT7XeH888PXG+3budaOOzj1vA3etf46FR45z6KEzuHzV\nItM2UkNem5rdAJwEHESUd18BNP8Kr2m8/wZRZc4rwCeB+zu8joFeHbnTo9Sdu1dKUsXFCfTeSjAF\nVoNIKjK3QEiBt4WTVGQG+hR4WzhJRWaOPgVOFkoKxclYSao4byUoSTLQS1LVGeglqeKso5cScA2F\nysAevZSAayhUBgZ6KQHXUKgMLK+UEij7GgpTT+VnHb2kns465549e/0vXXKIN3MpITc1U2nYs8xX\n8+f92BPbAVNPVWeOXoXgpGa+mj/vrVsnmTN7Otdf+wH/uVaYgV6F4KRmvlp/3rffWjPIV5w5ehVC\nUSc1q5pSKurPW4NzMlYaQmtw37H9dX72wEuAk5UqJidjpSE089cABx+0H2BKSeVmoNdI65Saac1f\nX3Xlcaz66sMdUxyDpnWqmgZS8TkZq5HWqdpn9RXHsHTJIVx/7QeYN3cmV115XMegPGilkJVFCsUe\nvUZap2qf8fGxWLn4QSuFrCxSKE7GaqQlqT4Z9LlWuigLVt1IUsVZdSMJcCJ41DkZK40AJ4JHmz16\nKYay94jjTASfvORHPP/8a0wbm8qam05k3tyZeTZRGbJHn8Cy5ROcdc49nHveBrZtmwzdHGWo7D3i\n1pLRbv+knn/+Nbbv2MXWrZOcefY9ObdQWbJHn0DrCsplF0+4PL7C4vSIi9zrj1MyOm1sKrCLGTOm\n8p0bPpRPw5SLNHr0S4CNwOPAhR2+XgNeBh5ovC1P4ZyFYF306IjTIw7R609zVLnmphOZM3s631/7\nu3ulbRy9llvS8sp9gEeBjwBPAz8FzgYeaTmmBnwBOK3Pa5WuvNK6aLU697wN3LX+OY4+alZu+7vn\ndYco70RVXHHKK5P26BcDTwBbgEngRuD0Tm1JeJ5Cag6HDfKCeL3+tOU1qnT0Wm5JA/DHgN8Hzmt8\nfA7wfuD8lmNOAm4GniLq9V8APNzhtYL16IucW1V55XFd5TWqjHMe/47CyGPBVJzIfD8wH3gVOBW4\nBXhXpwNXrly553GtVqNWqyVsXjxxJ1W9kDWIPK6ruPvyJBXnPBYn5KNer1Ov1wd6TtJA/zRREG+a\nT9Rzb7W95fE64GrgQODF9hdrDfR5ijss9ULWIEbtujK9k4/2TvAll1zS9zlJc/T3AYcDC4B9gY8D\na9qOmc2bw4rFjcd7BfmQ4uZWvZA1iFG7rkLMUSieNCZJTwW+TlSB801gFfDpxteuAf4M+Aywkyh9\n8wXgJx1ep/BVN1bZKAteV0rC3SslqeLcvVIqkZCT/RYaVJt73UgpSGPlaMj9dMq+l496M9BLKUgj\nUIaclK3KhLA6M9BLKUgjUIasWul2bve4qQYnYzUyssxDZ105EyqH7h43xedkrNQiy4VJrStHkwTl\nbs8NtajKlE41mLoZgMPYcssraCXJ13d7bqiA6yKoajB1MwCHseWWZXqltSe+c/IN7vnxC0NtV9xt\nq+O02245ZXW4YCplcfcb94+o+tp/x5/67L17OgGnnDybsbGpQwXlvFbJ2mmpDnP0KVt9xTGx/gir\nskmVumv/HbemVi6/bNHQQTqv3SjNvY8WA/0A4v4R+kdUfZ1+x506AUUd3bV3WoraTqXD1E0G3KSq\n+tp/x90CZVlSJGVpp/Zm6iaQvIbfSscwvdn233G3dF1ZRndlaaeGY3llRizFLI8sty8oS3likdvp\n31Jypm5S1Noz3LFjkp/dvxVwKFx0caupejFdlx3TSr1ZXpmTZoDf+Nh2Xn456nEcfNC+PP/C64mC\nh/KRRZAe1cnNYb/vXs9L4x9xlcUJ9KZuUtAc+jeD/NFHzeLmfzuhsENhvVUz357m76ls2/6mlR4Z\n5vtetnyCteue7vq8IqeVysLJ2BQ087MLjxzn0ENm7Kmjbh1iNnssv3zyVeYeOoP99x8bqZ7eqCnb\n5GZaaz+G+b43b9nB9h27AJg1PrbX8yxuSM5An4I4C6la/5CeeeY1wMVUVRZ3cV0SaaZJ0vrH1Ov7\n7tbe5rlnjU/j1ltOtPOTAXP0GWm/qD//xfu5a/1z7L//NHbs2Gm+UQPpFCTjTlL22q6h+bw8JpO7\ntdeJ7GTM0QfUnqts5hnXrTnJfKMG1in3HbcX3v7cTs/LYp6iXbf25nHuUWfqJoFeQ+f2i7o1z5jG\nvuUaLZ2CZNz0UNztGpqyui7zSGepM1M3CfQaOscZjlofrLh6XU/9AvOgqRGvy3JxC4SM9Ro6N3vw\nRx+7jlde3cnUqVNYc/MJHPHuWbGeL7XqVXnSr2Km03MHGY2q/MzRJxCnvveVV3fyxhuwc+duzjjz\nPwd+vqopzWX9w5Y0Wrc+OkzdpKRbD+nwhbeyc+dupkyBtf9+4lt69BpdaaZHuqVmXG06Gqy6yVG3\nHtKam09gv/2mGuRHUK9ee5rpkW5VK/ba1WSgT0m3P9wj3j2LjT//A4P8CAodaOPMIRnkR0MagX4J\nsBF4HLiwyzGrG1+fABalcM7CsYekdqEDbZWuSbcqTiZpjn4f4FHgI8DTwE+Bs4FHWo5ZCnyu8f79\nwJXA8R1eq9Q5+rRZY19+Wa34HMVrw5LP7vLI0S8GngC2AJPAjcDpbcecBlzXeLwBOACYnfC8lVe2\n3Q+1t6x67aN4bfSb07DH31vSQD8XeLLl46can+t3zLyE5608a5nVzSheG/3SUKP4z28QSRdMxc21\ntA8rOj5v5cqVex7XajVqtdpQjaoCl4urm1G8NvptVTxK//zq9Tr1en2g5yTN0R8PrCSakAVYBrwB\n/E3LMf8A1InSOhBN3J4EPNv2WpXP0Y9iblXxeX0Mb5R3wMwjR38fcDiwANgX+Diwpu2YNcCfNB4f\nD7zE3kF+JDi8VC9Vvj6yzqFbLtpb0tTNTqKKmtuJKnC+SVRx8+nG168BbiOquHkCeAX4ZMJzltYo\nDS81uCpeH53up+wNd/LnFgg5GuXhpfqr4vXRWhYJuOVCBuKkbgz0CZlXVRHlfV12O19zT532+ykr\nPe51k4Mq51VVXnlfl93O1yyL/NZ1H+Qfr15skA/E/egTyjqv6ohBw8g739/vNoEKy9RNQknu/BOH\nS781jLzz/d3Od/KSH/H8868xbWwqa246kXlzZ2bellFjjj6wuEHafcNVVUcfexvbd+wCYM7s6fx4\n/SmBW1Q93kpwAFmkSOIOn3vdCm4UV0GqOim7aWNTgV3MmDGV79zwoa7HVeX7LSonYxuymLyKu01s\n6O1sVTxVmeRfc9OJzJk9ne+v/d2eaZuqfL9FZY++IYvJq9aJqF49FnvtaleVxVPz5s6Mla6pyvdb\nVOboG7KevHJSVYOo0uKpOGmZrIsaqszJ2AKJM6nqBa1uynxtJO3k2EnqzQVTBRInX2+eUt2U+dqI\nm5bptvGZaZ3kDPQ5iTOp6gWtbpIGy5DiFiX0W11refHwTN3krNcQvEp5WaUr7rUxaJpj2JRQFqkk\n14wMxxx9AZlvVJYGDZbDXo9ZXMd2dN4q7j9Tc/QB9Bs6m55RlgZNc8S5Hjtd01mWIxvkI2nOy9ij\nT1m/no69FuUhbm8wzvXY6Zou4nVc5sqkTuKOzkzdBGCeUUWQZmqlLNd01dKicf+ZmroJwAoBFUGa\nqZWiXdOjUoaZZirLHn3Gli2f4Id3PsNLL08yY/o+HPXeA7h6tXlIJTeqFVzdeu5V/p57MXUTUKeb\nIjdVYVipsJYtn2Dtuqf3bAFcxWuq3+0Ji55Kyoupm4CaM+btQX7hkeOVGFYqrM1bduwJ8rPGp1Xy\nmkqygGqYhWNFXGyWFnevzEgzX7jwyHEOPmg/mAJjY1O5fJU3R1Zyzetr1vgYt95yYiWvqSS3J+x1\nj4c0n1MWBvqM9Np6uGplYMpfVba2Hnb77n5/Q3HXB7S+RtUmc1uZow+gamVg0rCyWpk7zPqAVZe+\nr5T/PL2VYEENskGVPX9V2bC96H7Pi5PeaX+NOM8pK3v0AXTrbbQH9k999l57/kpNqI5DFmWgaZRS\nVqUc0/LKkmkfSr7y6i7LyJSaUClDU5XZMnVTYJ16OZ2Go+09DtM5GlaoycYqT3KWRZIe/YHAt4Hf\nArYAfwy81OG4LcA2YBcwCSzu8noj1aMfdqMoe0caVqhURVVSJEll1UnLOnXzVeCFxvsLgXcAX+5w\n3GbgWODFPq83UoF+2NV9rgpU0yiO7sr8PWfVSct6ZexpwHWNx9cBf9SrLQnOU0nDbhRVtA2mFE6Z\n7yM7rDJ/zyFTWEkC/Wzg2cbjZxsfd7Ib+AFwH3BegvNVyrA703lzBjWNYu67zN9zyE5av572HcCc\nDp+/iKgX/46Wz71IlLdvdwjwa+DgxuudD9zd4bjdK1as2PNBrVajVqv1aV55lXkIqmLIKvdd5GvT\nfD/U63Xq9fqejy+55BLIMEe/EagBzxAF8zuB9/R5zgpgB3BFh6+NRI5+z66Wj27j5W07ASdVVSxO\n+JdL1jn6NcAnGo8/AdzS4ZiZwNsbj98GfBR4KME5S2/PrpaNIF/GIaiqrczpEXWWJNBfBpwCPAb8\nXuNjgEOBtY3Hc4jSNA8CG4Bbge8nOGfpte5qecpHZjupqsJxwj+5om15XKRqmJFI3ZhjlLIXep4h\nz/SXNx4pIKtmpOyFLsMsWvrLQB9I0YZ2UpWEDrRFS3+ZusmJO1NK+RmlFKm7VxaIO1NKyoK7VxZI\nnJ0pJSkL9uhzMkpDSUn5MXVTcKFLwCSVn+WVBRe6BEzSaDBHH1DoEjCNLkeT8VXhZ2WPPqCi1dpq\ndDiajK8KPyt79AE1V8lKeXM0GV8VflZOxkojyCqw+Ir+s7LqRpIqzgVTFVGFySBJ4TgZWwJVmAyS\nFI49+hKowmSQVGRVHzXboy8ByzClbFV91GyPvgQsw5SyVfVRs1U3OVi2fIIf3vkMr0/u5r0LZ3G1\nd5iSCqXoJZS9WF5ZEK170YM3GpGUHjc1K4jmsBBg4ZHjlRwaSioue/Q52LZtkguWPQC74fLLFpVu\naCipuEzdSFLFuTK2gqpe7yspfeboS6bq9b6S0megL5mq1/tKSp85+pIpc72vpPQ5GStJFZd1Hf2Z\nwC+AXcAxPY5bAmwEHgcuTHA+SdIQkgT6h4AzgPU9jtkH+AZRsD8SOBs4IsE5JUkDSlJeuTHGMYuB\nJ4AtjY9vBE4HHklwXknSALKuupkLPNny8VONz0mSctKvR38HMKfD578CfC/G6zu7KkmB9Qv0pyR8\n/aeB+S0fzyfq1Xe0cuXKPY9rtRq1Wi3h6SWpWur1OvV6faDnpFFeeSdwAfCzDl+bBjwKnAz8CriX\naEK2U47e8kpJGlDW5ZVnEOXfjwfWAusanz+08THATuBzwO3Aw8C3cSJWknLlgilJKjFvPCJJMtBL\nUtW5H32O3EteUgj26HPkXvKSQjDQ58i95CWFYNVNjtxLXlLa3I9ekirOm4NLUgGELsQwR5+DZcsn\nOOucezj3vA1s2zYZujmScha6EMNAn4PQv2RJYYUuxDDQ5yD0L1lSWKuvOIalSw7h+ms/EKQQw8nY\nHFhtIykrVt1IUsW5qZkkyUAvSVVnoJekijPQS1LFGeglqeIM9JJUcQZ6Sao4NzUrmNCbH0mqHnv0\nBeO+OJLSZo++YNwXRyqHMo2+7dEXTOjNjyTFU6bRtz36ghkfH+OqK48L3QxJfZRp9O2mZpI0hKLs\nSuvulZJUce5eKUlKFOjPBH4B7AKO6XHcFuDnwAPAvQnOJ0kaQpJA/xBwBrC+z3G7gRqwCFic4HyF\nUK/XQzehrzK0EWxn2mxnusrSzjiSBPqNwGMxjy3SXEAiZfjll6GNYDvTZjvTVZZ2xpFHjn438APg\nPuC8HM4nSWrRr47+DmBOh89/BfhezHN8CPg1cHDj9TYCd8dtoCQpmTRSKncCXwTuj3HsCmAHcEWH\nrz0BHJZCeyRplGwC3tnrgLRWxnb7hzET2AfYDrwN+ChwSZdjezZUkpS/M4Angf8FngHWNT5/KLC2\n8fi3gQcbb/8FLMu5jZIkSZLy9kXgDeDA0A3p4lJggmiU8kNgftjmdPW3wCNEbb0ZmBW2OV3FXXgX\nyhKiAoLHgQsDt6Wba4Fnida2FNl8ojm9XxCN8D8ftjkdTQc2EP19PwysCtucvvYhWowatzimEOYD\n/wFspriB/u0tj88H/ilUQ/o4hTfLZy9rvBXRe4B3EQWAogX6fYiKBBYAY0R//EeEbFAXJxAtSCx6\noJ8D/E7j8f7AoxTz5zmz8X4a8BPgwwHb0s8XgH8F1vQ6qGh73fwd8JehG9HH9pbH+wMvhGpIH3cQ\njYwg6qHMC9iWXgZZeJe3xUSBfgswCdwInB6yQV3cDWwN3YgYniH6ZwlR9d0jRHN6RfNq4/2+RP/s\nXwzYll7mAUuJOpul2dTsdOApon1xiu6vgP8BPkFxe8qt/hS4LXQjSmguUcFB01ONzym5BUSjkA2B\n29HJVKJ/SM8SjTQfDtucrr4GfIk3O3Rd5X3jkW4LsC4iqsj5aMvnQm6b0G+h2EWNty8T/bA/mV/T\n3iLOgraLgNeBb+XVqA7SWHgXgvtmZ2N/4LvAnxP17IvmDaIU0yzgdqK9uuoB29PJHwLPEeXna2Gb\nEt97if57bm68TRINl38jYJvi+E2iSaWiOhe4h2iCqeiKmKM/nmjOqGkZxZ2QXUDxc/QQzXXcDvxF\n6IbEdDFwQehGdPDXRKPNzUQ7D7wC/EvQFg2hyJOxh7c8Ph+4PlRD+lhCVN1wUOiGxHQncGzoRrSZ\nRrTqcAFRvraok7FQjkA/hSgYfS10Q3o4CDig8XgG0e68J4drTiwnUeyRcVf/TXED/XeJ/qAeBG6i\nuKOOx4FfEg3tHgCuDtucrrotvCuKU4mqQ56guAv+bgB+Bfwf0c8yVCqxnw8TpUUe5M3rcknQFu3t\nKKLtXB4kmi/8UtjmxHISfapuJEmSJEmSJEmSJEmSJEmSJEmSJKky/h9/iNWzzLPTIwAAAABJRU5E\nrkJggg==\n",
"text": "<matplotlib.figure.Figure at 0x7f8681d48f98>"
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "",
"prompt_number": 6,
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "import nltk\nfrom nltk.book import *",
"prompt_number": 8,
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "*** Introductory Examples for the NLTK Book ***\nLoading text1, ..., text9 and sent1, ..., sent9\nType the name of the text or sentence to view it.\nType: 'texts()' or 'sents()' to list the materials.\ntext1:"
},
{
"output_type": "stream",
"stream": "stdout",
"text": " Moby Dick by Herman Melville 1851\ntext2:"
},
{
"output_type": "stream",
"stream": "stdout",
"text": " Sense and Sensibility by Jane Austen 1811\ntext3:"
},
{
"output_type": "stream",
"stream": "stdout",
"text": " The Book of Genesis\ntext4:"
},
{
"output_type": "stream",
"stream": "stdout",
"text": " Inaugural Address Corpus\ntext5:"
},
{
"output_type": "stream",
"stream": "stdout",
"text": " Chat Corpus\ntext6:"
},
{
"output_type": "stream",
"stream": "stdout",
"text": " Monty Python and the Holy Grail\ntext7:"
},
{
"output_type": "stream",
"stream": "stdout",
"text": " Wall Street Journal\ntext8: Personals Corpus\ntext9:"
},
{
"output_type": "stream",
"stream": "stdout",
"text": " The Man Who Was Thursday by G . K . Chesterton 1908\n"
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "def pieces(text, length, num): \n out = [] \n for x in range(num): \n out.append(text[length*x:length*(x+1)])\n return out\n\n#mel_pieces = [text1[:1000], text1[1001:2000, text1[2001:3000], text1[3001:4000]]\n\nmelpieces = pieces(text1, 1000, 4)\nauspieces = pieces(text2, 1000, 4)",
"prompt_number": 9,
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "def freqdists(pieceslist, mfw): \n dists=[]\n for piece in pieceslist: \n dists.append(FreqDist(piece).most_common(mfw))\n return dists\n\nmeldists = freqdists(melpieces, 100)\nausdists = freqdists(auspieces, 100)",
"prompt_number": 10,
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "meldists[0][1]",
"prompt_number": 66,
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"text": "('.', 44)",
"prompt_number": 66
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "top100=fdist.most_common(100)",
"prompt_number": 11,
"outputs": [
{
"output_type": "pyerr",
"ename": "NameError",
"evalue": "name 'fdist' is not defined",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-11-da75db89b0a2>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtop100\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mfdist\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmost_common\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m100\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mNameError\u001b[0m: name 'fdist' is not defined"
]
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "top100[:5]",
"prompt_number": 17,
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"text": "[(',', 18713), ('the', 13721), ('.', 6862), ('of', 6536), ('and', 6024)]",
"prompt_number": 17
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "len(text1)",
"prompt_number": 18,
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"text": "260819",
"prompt_number": 18
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "type(top100[0])",
"prompt_number": 19,
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"text": "tuple",
"prompt_number": 19
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "def makePercents(freqdists, textlen): \n percentlist = []\n for dist in freqdists: \n percents = [] \n for word in dist: \n wordtext = word[0]\n wordfreq = word[1]\n percentage = (wordfreq/textlen)*100\n percentexpr = [wordtext, percentage]\n percents.append(percentexpr)\n percentlist.append(percents)\n return percentlist\n\nmelpercs = makePercents(meldists, len(text1))\nauspercs = makePercents(ausdists, len(text2))",
"prompt_number": 25,
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "def makeComparisonArray(textA, textB): \n for word in textA: \n compword = []\n # aaaaargh",
"prompt_number": 29,
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "mtest = melvillepercents[:8]\natest = austenpercents[:8]\nnp.array(mtest)\nnp.array(mtest, dtype=[('token', np.unicode_, 64),('perc', np.float)])",
"prompt_number": 30,
"outputs": [
{
"output_type": "pyerr",
"ename": "NameError",
"evalue": "name 'melvillepercents' is not defined",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-30-555bef33313f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmtest\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmelvillepercents\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;36m8\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0matest\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0maustenpercents\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;36m8\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmtest\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmtest\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'token'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0municode_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m64\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'perc'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfloat\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mNameError\u001b[0m: name 'melvillepercents' is not defined"
]
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "melNP=np.array(mtest, dtype=[('token', '|S100'),('mel_perc', '<f8')])\nausNP=np.array(atest, dtype=[('token', '|S100'),('aus_perc', '<f8')])",
"prompt_number": 55,
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "import numpy as np\nimport numpy.lib.recfunctions as recfunctions\ndef nplist(distlist, label): \n \"\"\" \n turns a list of freqency distributions in to a list of numpy arrays. \n Params: a list of freqency distributions, a label (string) for the column, \n representing the text\n \"\"\" \n nplist = []\n i = 0 \n for dist in distlist: \n nplist.append(np.array(dist, dtype=[('token', np.unicode_, 64),(label+'_freq'+str(i), np.int16)]))\n i += 1\n return nplist\n\n# make the lists of freqency distributions into lists of numpy arrays\nmelnps = nplist(meldists, 'mel')\nausnps = nplist(ausdists, 'aus')\n",
"prompt_number": 33,
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "melnps[1].dtype",
"prompt_number": 35,
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"text": "dtype([('token', '<U64'), ('mel_freq1', '<i2')])",
"prompt_number": 35
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "markdown",
"source": "I think the \"Masked Constant\" here, i.e. `--`, can be filled in using numpy.array.fillValue(), \n\nsee http://docs.scipy.org/doc/numpy/reference/generated/numpy.ma.set_fill_value.html"
},
{
"metadata": {},
"cell_type": "code",
"input": "def mergearrays(arraylist): \n merged = None\n for array in arraylist[1:]: \n if None == merged: #first time\n merged = recfunctions.join_by('token', array, arraylist[arraylist.index(array)-1]) #join first and second arrays\n else: \n merged = recfunctions.join_by('token', merged, array, jointype='outer')\n return merged\n\nmelnptotal = mergearrays(melnps + ausnps)\ntype(melnptotal)",
"prompt_number": 108,
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"text": "numpy.ma.core.MaskedArray",
"prompt_number": 108
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "melnptotal.fill_value\n\ndef fill_array(nparray):\n \"\"\"\n Takes a numpy masked array and fills it with zeros. \n Assumes first column is tokens. \n \"\"\" \n size = len(nparray.fill_value)\n newFillValue = ['N/A']\n for i in range(size-1): \n newFillValue.append(0)\n newFillValue = tuple(newFillValue)\n nparray.fill_value = newFillValue\n filledArray = nparray.filled()\n return filledArray\n\nnpfilled = fill_array(melnptotal)\nnpfilled[:5]",
"prompt_number": 105,
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"text": "array([('!', 0, 0, 3, 10, 0, 2, 4, 4), ('!\"', 0, 0, 0, 5, 0, 0, 0, 0),\n ('\"', 23, 13, 20, 32, 0, 0, 22, 9),\n (\"'\", 21, 8, 23, 12, 5, 10, 0, 7), ('(', 3, 2, 2, 3, 0, 0, 0, 0)], \n dtype=[('token', '<U64'), ('mel_freq1', '<i2'), ('mel_freq0', '<i2'), ('mel_freq2', '<i2'), ('mel_freq3', '<i2'), ('aus_freq0', '<i2'), ('aus_freq1', '<i2'), ('aus_freq2', '<i2'), ('aus_freq3', '<i2')])",
"prompt_number": 105
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "numbers = npfilled.dtype.names[1:] #slice off the token names\nnumbersArray = npfilled[[x for x in numbers]]\ndata = np.array(numbersArray.tolist()).T",
"prompt_number": 137,
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "plot_pca(data)",
"prompt_number": 138,
"outputs": [
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEACAYAAAC9Gb03AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAD9JJREFUeJzt3W2MVNd9x/Hv2Iu7WO5CkFXAgIuFY7kkQEwix45c+bp+\nKLFSE7+I7UqNIFH9JlJiqUnKLjZlJRRBsaw6rhKpUuPIJTUtaSICTeyAXQbUqlkjx6wd4w0PZVuT\nCJBFXILiKDRMX5y77Ows+zBzZ/beOfv9SKO5987MPX8t9m/PnHvOXZAkSZIkSZIkSZIkSZIkSVIL\nXAm8BuxO9+cAe4EjwB5gdk51SdK0d0WTzvMYcBiopPvdhKC/CXg53ZcktamFwEvAXQz36AeAuen2\nvHRfktSmvg3cAtzJcND/our1Us2+JGkKZR26+QRwhjA+XxrjPRWGh3QkSVOsI+PnPwY8ANwPdAJd\nwDbgNGHI5hQwn/DLYIQlS5ZUjh8/nrF5SZp2jgM31vOBrD369cAi4AbgEeDfgE8Du4A16XvWADtr\nP3j8+HEqlUrhHxs3bsy9Buu0Tuu0xqEHsKTeoG7WrJshQ0M0W4B7CdMr/yjdlyTlIOvQTbX96QPg\nLHBPE88tSWpQs3v00UmSJO8SJsU6m8s6m6sd6myHGhs11kyZqVBJx5skSZNUKpWgzuy2Ry9JkTPo\nJSlyBr0kRc6gl6TIGfSSFDmDXpIiZ9BLUuQMekmKXDNvgSBJDet5op8Tg+fpnNnBM0+tpKtrRt4l\nRcMevaRCODF4nr6DZ9l/4Aw9G/rzLicqBr2kQuicGQYYli+bxeZNK3KuJi7e60ZSIZw7d4GeDf1s\n3rTCYZtxNHKvG4NektqINzWTJI1i0EtS5Ax6SYqcQS9JkXPBlDRNuUBp+rBHL01TLlCaPgx6aZpy\ngdL04Tx6aZpygVJ7ymPBVCewH/gd4Crge0APMAf4Z+D3gUHgIeDdms8a9JJUp7xWxl4N/IpwYfff\ngS8BDwDvAFuBdcD7gO6azxn0klSnvFbG/ip9vgq4EvgFIeifS48/B3yyCe1IkhrQjKC/AjgEnAb2\nAW8Cc9N90ue5TWhHktSAZsyjvwh8CJgF/BC4q+b1SvoYpbe399J2kiQkSdKEciQpHuVymXK5nOkc\nzZ51swF4D/hzIAFOAfMJPf2ba97rGL0k1SmPMfprgdnp9kzgXuA1YBewJj2+BtiZsR1JUoOy9uiX\nES62XpE+tgFPEqZX7gCux+mVktQ0/uERSYqcf3hEkjSKQS9JkTPoJSlyBr0kRc6gl6TIGfSSFDmD\nXpIiZ9BLUuQMekmKnEEvSZEz6CUpcga9JEXOoJekyBn0khQ5g16SImfQS1LkDHpJipxBL0mRM+gl\nKXIGvSRFzqCXpMgZ9JIUOYNekiKXNegXAfuAN4GfAF9Ij88B9gJHgD3A7IztSJIaVMr4+Xnp4xBw\nDfAq8EngM8A7wFZgHfA+oLvms5VKpZKxeUmaXkqlEtSZ3Vl79KcIIQ9wHngLWAA8ADyXHn+OEP6S\npBw0c4x+MXAL0AfMBU6nx0+n+5KkHHQ06TzXAN8BHgN+WfNaJX2M0tvbe2k7SRKSJGlSOZIUh3K5\nTLlcznSOrGP0ADOAfwVeAJ5Ojw0ACWFoZz7hgu3NNZ9zjF6S6pTHGH0J+AZwmOGQB9gFrEm31wA7\nM7YjSWpQ1h79HcAB4HWGh2d6gFeAHcD1wCDwEPBuzWft0UtSnRrp0Tdj6KZRBr0k1SmPoRtJUsEZ\n9JIUOYNekiJn0EtS5Ax6SYqcQS9JkTPoJSlyBr0kRc6gl6TIGfSSFDmDXpIiZ9BLUuQMekmKnEEv\nSZEz6CUpcga9JEXOoJekyBn0khQ5g16SImfQS1LkDHpJipxBL0mRM+glKXLNCPpngdPAG1XH5gB7\ngSPAHmB2E9qRJDWgGUH/TWBVzbFuQtDfBLyc7kuSclBq0nkWA7uBZen+AHAnoac/DygDN9d8plKp\nVJrUvCRND6VSCerM7laN0c8lhDzp89wWtSNJmkDHFLRRSR+j9Pb2XtpOkoQkSaagHElqH+VymXK5\nnOkcrRy6SYBTwHxgHw7dSFJmRRq62QWsSbfXADtb1I4kaQLN6NFvJ1x4vZYwHv9XwPeAHcD1wCDw\nEPBuzefs0UtSnRrp0Tdr6KYRBr0k1alIQzeSpIIw6CUpcga9JEXOoJekyBn0khQ5g16SImfQS1Lk\nDHpJipxBL0mRM+glKXIGvSRFzqCXpMgZ9JIUOYNekiJn0EtS5Ax6SYqcQS9JkTPoJSlyBr0kRc6g\nl6TIGfSSFDmDXpIiZ9BLUuRaGfSrgAHgKLCuhe1IksZRatF5rwR+CtwD/Aw4CPwp8FbVeyqVSqVF\nzUtSnEqlEtSZ3a3q0d8KHAMGgQvAPwGrW9SWJGkcHS067wLg7ar9k8BHW9TWpPU80c+JwfN0zuzg\nmadW0tU1I++SJKnlWhX0kxqT6e3tvbSdJAlJkrSonODE4Hn6Dp4FoGdDP1/76kda2p4kZVUulymX\ny5nO0aox+tuAXsIFWYAe4CLw11XvmfIx+rWP9rH/wBmWL5vFtmdvt0cvqe00MkbfqqDvIFyMvRv4\nOfAKBbgYe+7cBXo29LN50wpDXlJbKlLQA3wceJowA+cbwOaa1511I0l1KlrQT8Sgl6Q6FWl6pSSp\nIAx6SYqcQS9JkWvVPPrCc/GUpOli2vbohxZP7T9whp4N/XmXI0ktM22DvnNm+DKzfNksNm9akXM1\nktQ603Z6pYunJLUj59FLUuScRy9JGsWgl6TIGfSSFDmDXpIiZ9BLUuQMekmK3LS9BUIzeTsFSUVm\nj74JvJ2CpCIz6JvA2ylIKjJXxjaBt1OQNFW8BYIkRc5bIEiSRjHoJSlyBr0kRc6gl6TIZQn6TwFv\nAr8FVta81gMcBQaA+zK0IUnKKMvK2DeAB4G/qzm+FHg4fV4AvATcBFzM0JYkqUFZgn5gjOOrge3A\nBWAQOAbcCvwoQ1uKkLeOkKZGK8borwNOVu2fJPTspRG8dYQ0NSbq0e8F5l3m+Hpgdx3tXHZlVG9v\n76XtJElIkqSOU6rdeesIaWLlcplyuZzpHM1YGbsP+CLw43S/O33ekj6/CGwE+mo+58rYac5bR0j1\ny+sWCPuALwGvpvtLgecJ4/JDF2NvZHSv3qCXpDpN9S0QHgTeBm4Dvg+8kB4/DOxIn18APscYQzeS\npNbzpmaS1Ea8qZkkaRSDXpIiZ9BLUuQMekmKnEEvSZEz6CUpcga9JEXOoJekyBn0khQ5g16SImfQ\nS1LkDHpJipxBL0mRM+glKXIGvSRFzqCXpMgZ9JIUOYNekiJn0EtS5Ax6SYqcQS9JkTPoJSlyBr0k\nRS5L0D8JvAX0A98FZlW91gMcBQaA+zK0IUlR63min0f+7D9Y+2gf585daEkbWYJ+D/ABYAVwhBDu\nAEuBh9PnVcDXM7YjSdE6MXievoNn2X/gDD0b+lvSRpYA3gtcTLf7gIXp9mpgO3ABGASOAbdmaEeS\notU5swOA5ctmsXnTipa00aye9meBH6Tb1wEnq147CSxoUjuSFJVnnlrJ/avms+3Z2+nqmtGSNjom\neH0vMO8yx9cDu9Ptx4HfAM+Pc57K5Q729vZe2k6ShCRJJihHkuLS1TWDr331I2O+Xi6XKZfLmdoo\nZfo0rAUeBe4Gfp0e606ft6TPLwIbCcM71SqVymXzX5I0hlKpBHVmd5ahm1XAlwlj8r+uOr4LeAS4\nCrgBeD/wSoZ2JEkZTDR0M56/JYT53nT/P4HPAYeBHenz/6XH7LpLUk6yDt1k4dCNJNVpqoduJElt\nwKCXpMgZ9JIUOYNekiJn0EtS5Ax6SYqcQS9JkTPoJSlyBr0kRc6gl6TIGfSSFDmDXpIiZ9BLUuQM\nekmKnEEvSZEz6CUpcln+wpQkTSs9T/RzYvA8nTM7eOaplXR1zci7pEmxRy9Jk3Ri8Dx9B8+y/8AZ\nejb0513OpBn0kjRJnTPDIMjyZbPYvGlFztVMnn8zVpIm6dy5C/Rs6GfzphW5Dds08jdjDXpJaiP+\ncXBJ0ihZgn4T0A8cAl4GFlW91gMcBQaA+zK0IUnKKEvQbwVWAB8CdgIb0+NLgYfT51XA1zO2k6ty\nuZx3CZNinc1lnc3VDnW2Q42NyhLAv6zavgZ4J91eDWwHLgCDwDHg1gzt5Kpd/vGts7mss7naoc52\nqLFRWRdMfQX4NPAew2F+HfCjqvecBBZkbEeS1KCJevR7gTcu8/iT9PXHgeuBbwJPj3Mep9dIUk6a\nNb3yeuAHwAeB7vTYlvT5RcL4fV/NZ44BS5rUviRNF8eBG6eqsfdXbX8e2JZuLyXMxLkKuCEtKs/5\n+pKkBv0LYRjnEPAd4PeqXltP6LEPAH889aVJkiRJarkvAheBOVXHirTQql0WhD0JvEWo9bvArKrX\nilLnp4A3gd8CK2teK0qNQ1YRajkKrMu5lmrPAqcJ36KHzCFMmDgC7AFm51BXrUXAPsK/90+AL6TH\ni1ZrJ+G64SHgMLA5PV60OgGuBF4Ddqf7RazxshYRLtKeYDjoh8b2ZwCLCUM/eS60+t2q7c8Df59u\nF63Oe6va38LwRfAi1XkzcBMhAKqDvkg1Qvgf6lhaywxCbX+QYz3V/hC4hZFBvxX4y3R7HcP/9nma\nR1hECWF9zU8JP8Mi1np1+txBmBJ+B8Ws8y+AfwR2pftFrPGyvg0sZ2TQ9zCyB/UicNsU1zWWHoZ/\nmEWu80HgW+l2EeusDfqi1Xh7WsOQboZnkRXBYkYG/QAwN92el+4XzU7gHopd69XAQeADFK/OhcBL\nwF0M9+jrrjGP3tNqwiKq12uOX5ceH1KEhVZfAf4HWMvwV7si1jnks4RprlDsOocUrcYFwNtV+3nX\nM5G5hOEc0ue547w3D4sJ30L6KGatVxC+tZ1meLipaHX+DfBlwjD3kLprbNWfEtxL+E1T63FCL656\nLHa8qZetXmg1Vp3rCb89H08f3YQFYZ8Z4zx51wmhzt8Az49znlbWOZkaJyPPxXXtvLCvQrHqv4Yw\nG+8xRt4uBYpT60XCMNMs4IeEXnO1vOv8BHCGMD6fjPGeSdXYqqC/d4zjHyTMrR/6G1wLgVeBjwI/\nY+QFz4XpsVYaq85azzPcUy5inWuB+4G7q45NdZ2T/VlWy+NnOZ7aehYx8htH0Zwm/HI9BcwnhEIR\nzCCE/DbC0A0Ut1aA/wW+D3yYYtX5MeABwv/bnUAX4WdapBon5XIXY4uy0KpdFoStInzlvLbmeNHq\nhPD1+MNV+0WrsSOtYXFaU5EuxsLoMfqtDF/j6KYYF+VKwD8QhhyqFa3WaxmerTITOEDoKBWtziF3\nMvzNuKg1jum/GDm9skgLrdplQdhR4L8JX+9eI9wWekhR6nyQMPb9HqEX8kLVa0WpccjHCTNFjhGG\nGYtiO/BzwvDc24RhxDmEC3VFmmZ3B2FI5BDD/02uoni1LgN+TKjzdcI4OBSvziF3Mjzrpqg1SpIk\nSZIkSZIkSZIkSZIkSZIkSVLr/T94lpdJI4DTuwAAAABJRU5ErkJggg==\n",
"text": "<matplotlib.figure.Figure at 0x7f867735d8d0>"
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "percs = result[['mel_perc','aus_perc']]\npercs.fill_value = 0\npercs = percs.filled()\npercs",
"prompt_number": 94,
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"text": "array([(7.174707364110744, 3.6028816919012803),\n (2.630943297842565, 1.524045410802127), (1.5612359529022042, 0.0),\n (1.7517895552087845, 0.783301830004716),\n (2.309647686709941, 1.2844156292294655), (0.0, 0.9339810366576055),\n (2.505952403774265, 1.3668482740904613),\n (5.260736372733581, 1.4803369386432736),\n (1.7414375486448457, 1.557785284047558)], \n dtype=[('mel_perc', '<f8'), ('aus_perc', '<f8')])",
"prompt_number": 94
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "percs2 = np.transpose(np.array([percs['mel_perc'], percs['aus_perc']]))\npercs2",
"prompt_number": 105,
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"text": "array([[ 7.17470736, 3.60288169],\n [ 2.6309433 , 1.52404541],\n [ 1.56123595, 0. ],\n [ 1.75178956, 0.78330183],\n [ 2.30964769, 1.28441563],\n [ 0. , 0.93398104],\n [ 2.5059524 , 1.36684827],\n [ 5.26073637, 1.48033694],\n [ 1.74143755, 1.55778528]])",
"prompt_number": 105
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "plot_pca(percs2)",
"prompt_number": 106,
"outputs": [
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEACAYAAAC9Gb03AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEQ1JREFUeJzt3X+MHOV9x/H3xWdqI3p2LatgbJArfigkxS5uao4WxLSQ\n9OK2OKnUUtqUH4kofxRC0yYxF7DY1kodt3ICbhJigY0AqbEqglKTEINLGVyhYEwLB8Q/8Dm+xoZi\nkjjgmFTtub7+8Qz2sr693WX2duaee7+klWd2n935yuDPzn6fZ2dBkiRJkiRJkiRJkiRJkiRJbbYe\nOAC8OMaYNcBuYAC4oBNFSZLa5xJCeNcL+iXAI9n2hcDTnShKktRe86kf9F8Drqza3wmcOt4FSZKC\n93TgGHOBfVX7+4F5HTiuJInOBD1AV83+SIeOK0mTXncHjvEKcEbV/rzsvnc466yzRvbs2dOBciQp\nKnuAs8ca0Ikz+o3A1dl2L/AGYZXOO+zZs4eRkZHS326//fbCa4ilzolQo3VaZ9lvwFmNQrgdZ/Rf\nBy4FZhN68bcDU7PH1hJW3CwBBoG3gOvacExJUpPaEfRXNTHmxjYcR5L0LnRqMjYaSZIUXUJTJkKd\nE6FGsM52s87Oq10NU6SRrN8kSWpSV1cXNMhyz+glKXIGvSRFzqCXpMgZ9JIUOYNekiJn0EtS5Ax6\nSYqcQS9JkTPoJSlyBr0kRc6gl6TIdeKHRySpo/pvG2Dv0GGmTe9mzepF9PRMbfykiHlGLyk6e4cO\ns3XbQZ7c8jr9yweKLqdwBr2k6EybHpoVC86fwcoVCwuupnheplhSdA4dGqZ/+QArVyyMvm3TzGWK\nDXpJmsC8Hr0kqS1B3wfsBHYDy0Z5fDawCXgeeAm4tg3HlCQ1KW/rZgqwC7gceAXYRvix8B1VYyrA\nzwH9hNDfBZwKHKl5LVs3ktSiZlo3edfRLwYGgaFsfwOwlHcG/X8BC7LtHuDHnBjymqBcryyVX97W\nzVxgX9X+/uy+ancD7wdeBQaAm3MeUyXiemWp/PKe0TfTa/kcoT+fAGcBm4GFwE9rB1YqlWPbSZKQ\nJEnO8jTeXK8sdVaapqRp2tJz8vboewk9+L5svx84CqyqGvMI8HngqWz/ccKk7bM1r2WPfgKaTOuV\npTLqxDr6bsLk6mWE1swznDgZ+0XgTeCvCZOw/07o2R+seS2DXpJa1InJ2CPAjcCjhBU46wghf0P2\n+Frgb4F7Cf359wCf5cSQlySNE78ZK0kTmN+MlSQZ9JIUO4NekiJn0EtS5Ax6SYqcQS9JkTPoJSly\nBr0kRc6gl6TIGfSSFDmDXpIiZ9BLUuQMekmKnEEvSZEz6CUpcga9JEXOoJekyBn0khQ5g16SIteO\noO8DdgK7gWV1xiTAc8BLQNqGY0qSmpT3x8GnALuAy4FXgG3AVcCOqjEzgaeA3wb2A7OBH43yWv44\nuCS1qBM/Dr4YGASGgGFgA7C0ZswfA98ghDyMHvKSpHGSN+jnAvuq9vdn91U7B5gFPAE8C/xpzmNK\nklrQnfP5zfRapgKLgMuAk4HvAk8TevrvUKlUjm0nSUKSJDnLk6S4pGlKmqYtPSdvj74XqBAmZAH6\ngaPAqqoxy4Dp2TiAe4BNwIM1r2WPXpJa1Ike/bOE1sx84CTgSmBjzZh/Bi4mTNyeDFwIbM95XElS\nk/K2bo4ANwKPEoJ8HWHFzQ3Z42sJSy83AS8QzvbvxqCXpI7J27ppJ1s3ktSiTrRuJEklZ9BLUuQM\nekmKnEEvSZEz6CUpcga9JEXOoJekyBn0khQ5g16SImfQS1LkDHpJipxBL0mRM+glKXIGvSRFzqCX\npMgZ9JIUOYNekiJn0EtS5Ax6SYpcO4K+j/AD4LuBZWOM+zXCj4n/fhuOKUlqUt6gnwJ8mRD27wOu\nAs6rM24VsIly/SC5JEUvb9AvBgaBIWAY2AAsHWXcTcCDwA9zHk+S1KK8QT8X2Fe1vz+7r3bMUuCu\nbH8k5zElSS3ozvn8ZkL7DuCWbGwXY7RuKpXKse0kSUiSJF91khSZNE1J07Sl5+Ttl/cCFUKPHqAf\nOErox7/t+1XHmQ38DLge2FjzWiMjI57sS1Irurq6oEGW5w36bmAXcBnwKvAMYUJ2R53x9wIPAw+N\n8phBL0ktaibo87ZujgA3Ao8SVtasI4T8Ddnja3O+viQppzItdfSMXpJa1MwZvd+MlaTIGfSSFDmD\nXpIiZ9BLUuQMekmKnEEvSZEz6CUpcga9JEXOoJekyBn0khQ5g16SImfQS1LkDHpJipxBL0mRM+gl\nKXIGvSRFzqCXpMgZ9JIUOYNekiLXjqDvA3YCu4Flozz+J8AA8ALwFLCgDceUJDUp74+DTwF2AZcD\nrwDbgKuAHVVjLgK2A28S3hQqQO8or+WPg0tSizrx4+CLgUFgCBgGNgBLa8Z8lxDyAFuBeTmPKUlq\nQXfO588F9lXt7wcuHGP8J4BHch5TUoT6bxtg79Bhpk3vZs3qRfT0TC26pGjkDfpWei2/CXwc+I16\nAyqVyrHtJElIkuTd1iVpgtk7dJit2w4C0L98gK/c+YGCKyqnNE1J07Sl5+Tt0fcSeu592X4/cBRY\nVTNuAfBQNm6wzmvZo5cmsWuv38qTW15nwfkzeGD9RZ7RN6mZHn3eoO8mTMZeBrwKPMOJk7FnAv8K\nfAx4eozXMuilSezQoWH6lw+wcsVCQ74FnQh6gA8DdxBW4KwDVgI3ZI+tBe4BPgr8ILtvmDCJW8ug\nl6QWdSro28Wgl6QWdWJ5pSSp5Ax6SYqcQS9JkTPoJSlyBr0kRc6gl6TIGfSSFDmDXpIiZ9BLUuQM\nekmKnEEvSZEz6CUpcga9JEXOoJekyBn0khQ5g16SImfQS1LkDHpJipxBL0mRa0fQ9wE7gd3Asjpj\n1mSPDwAXtOGYkqQm5Q36KcCXCWH/PuAq4LyaMUuAs4FzgD8D7sp5TElSC/IG/WJgEBgChoENwNKa\nMVcA92XbW4GZwKk5jytJalLeoJ8L7Kva35/d12jMvJzHlSQ1qTvn80eaHNfVzPMqlcqx7SRJSJLk\nXRUlSbFK05Q0TVt6Tm0At6oXqBB69AD9wFFgVdWYrwEpoa0DYeL2UuBAzWuNjIw0+74hSQLo6uqC\nBlmet3XzLGGSdT5wEnAlsLFmzEbg6my7F3iDE0NekjRO8rZujgA3Ao8SVuCsA3YAN2SPrwUeIay8\nGQTeAq7LeUxJUgvytm7aydaNJLWoE60bSVLJGfSSFDmDXpIil3cyVlKE+m8bYO/QYaZN72bN6kX0\n9EwtuiTl4Bm9NMn03zbAH33sKa69fiuHDg2POmbv0GG2bjvIk1tep3/5QIcrVLsZ9NIk00yIT5se\nPuwvOH8GK1cs7GR5GgcGvTTJNBPia1YvYknfHB5Yf5Ftmwi4jl6aZA4dGqZ/+QArVyw0xCPQzDp6\ng16SJjC/MCVJMuglKXYGvSRFzqCXpMgZ9JIUOYNekiJn0EtS5Ax6SYqcQS9JkfMyxVKEvMywquU9\no58FbAZeBh4DZo4y5gzgCeB7wEvAJ3MeU1IDXmZY1fIG/S2EoD8XeDzbrzUMfAp4P9AL/DlwXs7j\nShqDlxlWtbwXNdsJXAocAE4DUuC9DZ7zTeAfCG8M1byomdQmXqFy8ujE1St/AvxC1WsdrNofzXzg\nScLZ/eGaxwx6SWpRM0HfzGTsZsLZeq1ba/ZHsls9pwAPAjdzYsgDUKlUjm0nSUKSJE2UJ0mTR5qm\npGna0nPa0bpJgNeAOYRJ19FaN1OBbwHfAe6o81qe0UtSizpxPfqNwDXZ9jWE/vsJdQDrgO3UD3lJ\n0jjJe0Y/C/gn4ExgCPhD4A3gdOBu4HeAi4EtwAscb+30A5tqXsszeklqkT8lKEmR86cEJUkGvSTF\nzqCXpMgZ9JIUOYNekiJn0EtS5Ax6SYqcQS9JkTPoJSlyBr0kRc6gl6TIGfSSFDmDXpIiZ9BLUuQM\nekmKnEEvSZEz6CUpcga9JEUuT9DPAjYDLwOPATPHGDsFeA54OMfxJEnvQp6gv4UQ9OcCj2f79dwM\nbOf4j4NLkjokT9BfAdyXbd8HfKTOuHnAEuAeyvVj5JI0KeQJ+lOBA9n2gWx/NF8CPgMczXEsSdK7\n1N3g8c3AaaPcf2vN/gijt2V+F3id0J9PWi1OkpRfo6D/4BiPHSC8CbwGzCEEeq1fJ7R4lgDTgB7g\nfuDq0V6wUqkc206ShCRJGpQnSZNLmqakadrSc/L0zP8O+DGwijARO5OxJ2QvBT4N/F6dx0dGRpyr\nlaRWdHV1QYMsz9Oj/wLhjP9l4LeyfYDTgW/XeY5JLkkdVqZVMJ7RS1KLxvuMXpI0ARj0khQ5g16S\nImfQS1LkDHpJipxBL0mRa/TN2EL03zbA3qHDTJvezZrVi+jpmVp0SZI0YZXyjH7v0GG2bjvIk1te\np3/5QNHlSNKEVsqgnzY9fNBYcP4MVq5YWHA1kjSxlfKbsYcODdO/fICVKxbatpGkMTTzzdhSBr0k\nqTleAkGSVM5VN5Li4Aq6cvCMXtK4cQVdORj0ksaNK+jKwclYSePGFXTjz1U3khQ5V91Iklx1I0ll\nMh4rlfKc0c8CNhN+HPwxYGadcTOBB4EdwHagN8cxJSlq47FSKU/Q30II+nOBx7P90dwJPAKcBywg\nBP6ElaZp0SU0ZSLUORFqBOtsN+sc23isVMoT9FcA92Xb9wEfGWXMDOASYH22fwR4M8cxC+f/pO0z\nEWoE62w36xzbmtWLWNI3hwfWX9S2lUp5gv5U4EC2fSDbr/VLwA+Be4H/AO4GTs5xTEmKWk/PVL5y\n5wfauhy1UdBvBl4c5XZFzbiR7FarG1gEfDX78y3qt3gkSSWzEzgt256T7dc6DdhbtX8x8K06rzfI\n8TcMb968efPW3G2QBvIsr9wIXAOsyv785ihjXgP2ESZsXwYuB75X5/XOzlGLJGkczAL+hROXV54O\nfLtq3EJgGzAAPESYoJUkSZIUmwqwH3guu/UVWk1jfwUcJXyyKaMVhE9RzxO+53BGseXU9feE71aU\n/RPfHxDajv9HWFhQNn2EebLdwLKCa6lnPWGF3otFFzKGM4AnCP+tXwI+WWw5dU0DthL+fW8HVhZb\nTvNuB/6y6CKadAawiTDRXNag//mq7ZuAe4oqpIEPcnz11xeyWxm9lzDX9ATlC/ophAm5+cBUwj/+\n84osqI5LgAsod9CfBvxKtn0KsIty/l3C8aXq3cDThMUuoyrbRc3KdDXNsXwR+GzRRTTw06rtU4Af\nFVVIA5sJn4wgnKHMK7CWsewkzEeV0WJC0A8Bw8AGYGmRBdXxb8BPii6igdcIb5QAhwmfNk8vrpwx\n/Sz78yTCm/3BegPLFvQ3ET7Cr6P+tXOKtpTQYnqh6EKa8HngB4RVUWU9U672ccLlMtSauYTVbW/b\nn92nfOYTPoFsLbiOet5DeFM6QPikub3ewE5fvXIzx9feV7sVuAv4m2x/BbAa+ESH6qo1Vp39wIeq\n7ivyU0i9Oj8HPEyo91bCl9S+BFzXudLeoVGdEOr8X+AfO1XUKJqps4xGii4gQqcQLsZ4M+HMvoyO\nEtpMM4BHgQRIC6ynZfMpZx/vlwnvnnuz2zDh4/IvFlhTM84kTCyV1bXAU4QJprIrY4++lzBn9LZ+\nyjshO59y/tuuNpUQnH9RdCEtWA58uugimjGnavtTFHtm16wyT8aeU7V9E/BAUYU00EdY4TC76EKa\n9ATwq0UXUaMb2EMI0ZMo72QslD/ou4D7CZ+Ay2w2x9vb04EtwGXFldO8+wl97wHCt2xHu0ha2Xyf\n8gb9g4R/UM8D36C8nzp2A//J8WW1Xy22nLo+SuiD/zdhwu47xZZzgg8TVogMEs7oy+jrwKvA/xD+\nLotqJY7lYkJL5HnKvdT7fMKFIp8n5OZnii1HkiRJkiRJkiRJkiRJkiRJkiRJkjrk/wHusN0cdPjK\nmQAAAABJRU5ErkJggg==\n",
"text": "<matplotlib.figure.Figure at 0x7ffa7bcc8400>"
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "result.shape",
"prompt_number": 143,
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"text": "(9,)",
"prompt_number": 143
}
],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "",
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
}
],
"metadata": {}
}
],
"metadata": {
"gist_id": "5cd08522c05cc557398a",
"signature": "sha256:42dc8e6b7021b99d06c3a41f3042446d138d3012fe620a1b93a990d93a1d63f7",
"name": ""
},
"nbformat": 3
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment