Skip to content

Instantly share code, notes, and snippets.

@jiaweih
Created December 16, 2015 06:45
Show Gist options
  • Save jiaweih/70e402277a8cc06c4bda to your computer and use it in GitHub Desktop.
Save jiaweih/70e402277a8cc06c4bda to your computer and use it in GitHub Desktop.
simple yelp data exploration
{
"metadata": {
"name": "",
"signature": "sha256:6e4c856e61ed366e5407f7c52e4864977f273be5e7ec092e0c83c95d96e8a925"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"%matplotlib inline"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"from pyspark.sql import SQLContext\n",
"sqlContext = SQLContext(sc)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 24
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##Read reviews data"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"reviews_file = '/Users/jiaweihe/Apps/spark-1.5.2/data/yelp_dataset_challenge_academic_dataset/yelp_academic_dataset_review.json'\n",
"reviews = sqlContext.read.json(reviews_file)\n",
"reviews.first()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 192,
"text": [
"Row(business_id=u'vcNAWiLM4dR7D2nwwJ7nCA', date=u'2007-05-17', review_id=u'15SdjuK7DmYqUAj6rjGowg', stars=5, text=u\"dr. goldberg offers everything i look for in a general practitioner. he's nice and easy to talk to without being patronizing; he's always on time in seeing his patients; he's affiliated with a top-notch hospital (nyu) which my parents have explained to me is very important in case something happens and you need surgery; and you can get referrals to see specialists without having to see him first. really, what more do you need? i'm sitting here trying to think of any complaints i have about him, but i'm really drawing a blank.\", type=u'review', user_id=u'Xqd0DzHaiyRqVH3WRG7hzg', votes=Row(cool=1, funny=0, useful=2))"
]
}
],
"prompt_number": 192
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##Rating counts"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"reviews_stars = reviews.map(lambda x: x[3])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"min_rating = reviews_stars.reduce(lambda x,y : min(x,y))\n",
"max_rating = reviews_stars.reduce(lambda x,y : max(x,y))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print 'Min rating: {}'.format(min_rating)\n",
"print 'Max rating: {}'.format(max_rating)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Min rating: 1\n",
"Max rating: 5\n"
]
}
],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"count_by_rating = reviews_stars.countByValue()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"x_axis = np.array(count_by_rating.keys())\n",
"y_axis = np.array([float(c) for c in count_by_rating.values()])\n",
"# we normalize the y-axis here to percentages\n",
"y_axis_normed = y_axis / y_axis.sum()\n",
"pos = np.arange(len(x_axis))\n",
"width = 1.0\n",
"ax = plt.axes()\n",
"ax.set_xticks(pos + (width / 2))\n",
"ax.set_xticklabels(x_axis)\n",
"plt.bar(pos, y_axis_normed, width, color='lightblue')\n",
"plt.xticks()\n",
"plt.xlabel('Ratings')\n",
"plt.ylabel('Percentage')\n",
"fig = plt.gcf()\n",
"fig.set_size_inches(8, 6)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAfcAAAGCCAYAAAD9mopVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHAVJREFUeJzt3X+0XWdd5/H3pzegQgERCyVtsAiFFIqCYoqt6OWHTGAh\nxcmMpSODUxArEmBEseo4kjqumYU/UcvUoEUQkQzQFqNDaKnoskxq0jiF2pB0GmuW+dHW0iK0VDAx\n3/nj7DSnt0nuyb337JP75P1a667s/eznued7Nl187rN/pqqQJEntOGnSBUiSpIVluEuS1BjDXZKk\nxhjukiQ1xnCXJKkxhrskSY3pLdyTrEyyPcltSS45Sr/vSrI/yapjHStJknoK9yRTwGXASuBZwIVJ\nzjpCv3cBnzzWsZIkaaCvmfsKYEdV7ayqfcA64PzD9HsL8DHg7jmMlSRJ9BfupwG7htZ3d20PSnIa\ng9C+vGs6+Oi8WcdKkqRD+gr3UZ5x+27gZ2vwPNx0P6OOlSRJnSU9fc4eYNnQ+jIGM/Bh3wmsSwLw\nzcDLk+wbcSxJ/CNAknTCqarMbOtr5r4FODPJGUkeCVwArJ9R3LdW1VOr6qkMzru/qarWjzJ26Hcs\nqp93vvOdE6+h9R/3sfu4lR/3s/v4cD9H0svMvar2J1kNXANMAVdU1bYkF3fb1x7r2D7qliRpMerr\nsDxVtQHYMKPtsKFeVRfNNlaSJB2eT6iboOnp6UmX0Dz38fi5j/vhfh6/lvZxjnbMfjFJUq18F0mS\nRpGEmuAFdZIkqSeGuyRJjTHcJUlqjOEuSVJjDHdJkhpjuEuS1BjDXZKkxhjukiQ1xnCXJKkxhrsk\nSY0x3CVJaozhLklSYwx3SZIaY7hLktQYw12SpMYY7pIkNcZwlySpMYa7JEmNMdwlSWqM4S5JUmOW\nTLoASVJ7kky6hBOa4S5JGosrt++ddAnNW7V86WHbPSwvSVJjDHdJkhpjuEuS1BjDXZKkxhjukiQ1\nxnCXJKkxhrskSY0x3CVJaozhLklSY3oL9yQrk2xPcluSSw6z/fwkn0tyU5K/SfLioW07k9zcbdvc\nV82SJC1GvTx+NskUcBnwUmAPcGOS9VW1bajbdVX1J13/5wBXA0/vthUwXVX39lGvJEmLWV8z9xXA\njqraWVX7gHXA+cMdquorQ6snA1+Y8Tt8C4EkSSPoK9xPA3YNre/u2h4iyauTbAM2AG8d2lTAdUm2\nJHnjWCuVJGmR6+utcDVSp6qPAx9P8kLgg8Azu03nVdUdSU4BPpVke1VdP6ZaJUla1PoK9z3AsqH1\nZQxm74dVVdcnWZLkCVV1T1Xd0bXfneRqBof5Hxbua9aseXB5enqa6enphalekqTjwC2bNrJ188ZZ\n+6VqpEn1vCRZAtwKvATYC2wGLhy+oC7J04Dbq6qSfAfw0ap6WpJHAVNVdV+SRwPXApdW1bUzPqP6\n+C6SpNkl8X3uPVi1fClV9bBr0nqZuVfV/iSrgWuAKeCKqtqW5OJu+1pgFfC6JPuA+4HXdMNPBa5K\ncrDeD80MdkmSdEgvM/c+OHOXpOOHM/d+HGnm7hPqJElqjOEuSVJjDHdJkhpjuEuS1BjDXZKkxhju\nkiQ1xnCXJKkxhrskSY0x3CVJaozhLklSYwx3SZIaY7hLktQYw12SpMYY7pIkNcZwlySpMYa7JEmN\nMdwlSWqM4S5JUmMMd0mSGmO4S5LUGMNdkqTGGO6SJDXGcJckqTGGuyRJjTHcJUlqjOEuSVJjDHdJ\nkhpjuEuS1BjDXZKkxhjukiQ1xnCXJKkxhrskSY0x3CVJakxv4Z5kZZLtSW5Lcslhtp+f5HNJbkry\nN0lePOpYSZJ0yJI+PiTJFHAZ8FJgD3BjkvVVtW2o23VV9Sdd/+cAVwNPH3GsJEnq9DVzXwHsqKqd\nVbUPWAecP9yhqr4ytHoy8IVRx0qSpEP6CvfTgF1D67u7todI8uok24ANwFuPZawkSRroK9xrpE5V\nH6+qs4AfAD6YJOMtS5Kk9vRyzp3BufJlQ+vLGMzAD6uqrk+yBPimrt9IY9esWfPg8vT0NNPT03Mu\nWJKk480tmzaydfPGWfulaqRJ9bx0QX0r8BJgL7AZuHD4orgkTwNur6pK8h3AR6vqaaOM7cZXH99F\nkjS7JFy5fe+ky2jequVLqaqHHeXuZeZeVfuTrAauAaaAK6pqW5KLu+1rgVXA65LsA+4HXnO0sX3U\nLalNnvFT63qZuffBmbukUTmrHL9Vy5e6j3twpJm7T6iTJKkxhrskSY0x3CVJaozhLklSYwx3SZIa\nY7hLktQYw12SpMYY7pIkNcZwlySpMYa7JEmNMdwlSWqM4S5JUmMMd0mSGmO4S5LUGMNdkqTGGO6S\nJDXGcJckqTGGuyRJjTHcJUlqjOEuSVJjDHdJkhpjuEuS1BjDXZKkxhjukiQ1xnCXJKkxhrskSY0x\n3CVJaozhLklSYwx3SZIaY7hLktQYw12SpMYY7pIkNcZwlySpMb2Fe5KVSbYnuS3JJYfZ/sNJPpfk\n5iT/J8m3DW3b2bXflGRzXzVLkrQYLenjQ5JMAZcBLwX2ADcmWV9V24a63Q58b1V9KclK4L3AC7pt\nBUxX1b191CtJ0mLW18x9BbCjqnZW1T5gHXD+cIequqGqvtStbgJOn/E7Mv4yJUla/PoK99OAXUPr\nu7u2I3kD8Imh9QKuS7IlyRvHUJ8kSc3o5bA8g3AeSZIXAa8HzhtqPq+q7khyCvCpJNur6vqFLlKS\npBb0Fe57gGVD68sYzN4foruI7veAlVX1xYPtVXVH9+/dSa5mcJj/YeG+Zs2aB5enp6eZnp5emOol\nSToO3LJpI1s3b5y1X6pGnlTPWZIlwK3AS4C9wGbgwuEL6pI8Bfg08Nqq+uuh9kcBU1V1X5JHA9cC\nl1bVtTM+o/r4LpIWvyRcuX3vpMto2qrlS93HPVi1fClV9bBr0nqZuVfV/iSrgWuAKeCKqtqW5OJu\n+1rgF4HHA5cnAdhXVSuAU4GrurYlwIdmBrskSTqkr8PyVNUGYMOMtrVDyz8K/Ohhxt0OPHfsBUqS\n1AifUCdJUmMMd0mSGmO4S5LUGMNdkqTGGO6SJDXGcJckqTGGuyRJjRkp3JN8fZL/nuT2JF/u2l7W\nPZhGkiQdR0aduf8mcDbww8CBrm0r8BPjKEqSJM3dqE+o+0Hg6VV1f5ICqKo9SY722lZJkjQBo87c\nv8aMPwS6169+YcErkiRJ8zJquH8UeH+SbwVI8mTgMmDduAqTJElzM2q4/xfg74GbgccBO4A7gF8a\nU12SJGmORjrnXlVfA34yyduBU4AvVNWBWYZJkqQJGCncDx6OH3Jy9371rwF3GPSSJB0/Rr1afsdR\nth1Ish54U1XdtQA1SZKkeRj1nPuPAX8MnAl8A/AM4IMM7nN/DoM/Ev7nOAqUJEnHZtSZ+xrgzKr6\n5259R5I3Af+vqn43yY9w9Nm9JEnqyagz95OAM2a0PQWY6pYfGFqWJEkTNOrM/d3Ap5O8D9gFLAMu\nAn6r2/4K4IaFL0+SJB2rUW+F+5UkNwM/BHwHg3vcX19Vn+y2Xw1cPbYqJUnSyEadudMF+SfHWIsk\nSVoAI4d7kucBLwSeAORge1X94hjqkiRJczTq+9x/DPgM8CLgZxnc/vZTwNPHV5okSZqLUa+WvwR4\neVX9IPBA9++/A/aPrTJJkjQno4b7KVX1V93ygSRTDM6//8B4ypIkSXM16jn33UmeWlV/D9wGnM/g\nXe5fG1tlkiRpTkYN918FzmLw2tdLgSuBRwJvHVNdkiRpjka9z/0PhpY3JHk88Miqum9slUmSpDkZ\n9Wr5m4bXq+prVXVfki3jKUuSJM3VqBfUPeyWtwxe6D7zPe+SJGnCjnpYPskHu8WvS/KHDD28hsGL\nZLaOqS5JkjRHs51z/7vu3+qWM7T+GeCjY6pLkiTN0VHDvarWACT564MviZmrJCsZvF1uCvj9qnrX\njO0/DPwMgz8g7gPeVFU3jzJWkiQdMurV8p9M8kzg24GTZ2x732zju4feXAa8FNgD3JhkfVVtG+p2\nO/C9VfWlLszfC7xgxLGSJKkzUrgn+XngF4HPAQ/M2DxruAMrgB1VtbP7fesYPAjnwYCuquH3wW8C\nTh91rCRJOmTUh9j8JLDi4GHyOTgN2DW0vhs45yj93wB8Yo5jJUk6oY0a7g8At87jc2rUjkleBLwe\nOO9Yx65Zs+bB5enpaaanp0cdKknSce+WTRvZunnjrP1GDff/Cvx2kkuBO4c3VNWBEcbvAZYNrS9j\nMAN/iCTfBvwesLKqvngsY+Gh4S5JUmvOPudczj7n3AfXP/Ke3zhsv1EfYvN+4I0MQnX/0M++Ecdv\nAc5MckaSRwIXAOuHOyR5CnAV8Nqq2nEsYyVJ0iGjztzn9SS6qtqfZDVwDYPb2a6oqm1JLu62r2Vw\nwd7jgcsHD79jX1WtONLY+dQjSVLLUjXyKW2SnAQ8qaruGF9Jc5OkjuW7SDpxJeHK7XsnXUbTVi1f\n6j7uwarlS6mqzGwf9cUxj0/yx8BX6Z5al+RVSX55YcuUJEnzNeo5998Fvgx8C/C1ru0G4DXjKEqS\nJM3dqOfcXwI8uar2defDqaq7kzxxbJVJkqQ5GXXm/k/AKcMN3dXtnlCRJOk4M2q4/z7wsSQvBk5K\n8t3AB4C1Y6tMkiTNyaiH5X8F+GcGL3B5BPAHDM7D/9aY6pIkSXM06lvhDjAIcsNckqTj3Ki3wv1c\nkhUz2lYk+ZnxlCVJkuZq1HPubwM+P6NtG4O3xUmSpOPIqOH+COBfZrT9C/B1C1uOJEmar1HD/f8C\nb57R9uNduyRJOo6MerX8fwauS/Ja4HYGL5J5MvD94ypMkiTNzazhnsEj6f4ZeAbwSgbvU78S+LOq\nun+85UmSpGM16sz9FuDkqvrwOIuRJEnzN+s59+49qjcBzxx/OZIkab5Gnbn/BbAhyfuBXUABYZD9\n7xtTbZIkaQ5GDffvAXYC33eYbYa7JEnHkVEfPzs95jokSdICGfU+d5I8IcnrDj5yNslpSU4fX2mS\nJGkuRn22/PcBtwL/AfivXfOZwOVjqkuSJM3RqDP33wJeU1Urgf1d218D54ylKkmSNGejhvu3VNV1\nM9r2AVMLXI8kSZqnUcN9W5KVM9peAvztAtcjSZLmadRb4d4O/FmSTwBfn+S9wA8A54+tMkmSNCdH\nDfckjwZ+ATgbuArYDfwB8A/Ad1XV7rFXKEmSjslsM/fLgOcDnwReDvxlVf3E2KuSJElzNts595cD\n/6aq3tEtv3L8JUmSpPmYLdwfXVV7AapqF/C48ZckSZLmY7bD8lNJXtwtB1gytA5AVX16LJVJkqQ5\nmS3c/xG4Ymj9nhnrAE9d0IokSdK8HDXcq+qMnuqQJEkLZOQXx0iSpMWht3BPsjLJ9iS3JbnkMNuX\nJ7khyVeT/NSMbTuT3JzkpiSb+6pZkqTFaNQn1M1LkikG98y/FNgD3JhkfVVtG+p2D/AW4NWH+RUF\nTFfVvWMvVpKkRa6vmfsKYEdV7ayqfcA6Zjy6tqrurqotDF5IczgZc42SJDWhr3A/Ddg1tL67axtV\nAdcl2ZLkjQtamSRJjenlsDyDcJ6P86rqjiSnAJ9Ksr2qrl+IwiRJak1f4b4HWDa0vozB7H0kVXVH\n9+/dSa5mcJj/YeG+Zs2aB5enp6eZnp6eW7WSJB2Hbtm0ka2bN87aL1XznVTPLskS4FYG74DfC2wG\nLpxxQd3BvmuA+6rq17v1RwFTVXVf95a6a4FLq+raGeOqj+8iafFLwpXb9066jKatWr7UfdyDVcuX\nUlUPuyatl5l7Ve1Pshq4BpgCrqiqbUku7ravTXIqcCPwWOBAkrcBzwKeCFyV5GC9H5oZ7JIk6ZC+\nDstTVRuADTPa1g4t38lDD90fdD/w3PFWJ0lSO3xCnSRJjTHcJUlqjOEuSVJjejvnLmk03cWjkjRn\nhrt0HPIWovFatXzppEuQxsrD8pIkNcZwlySpMYa7JEmNMdwlSWqM4S5JUmMMd0mSGmO4S5LUGMNd\nkqTGGO6SJDXGcJckqTGGuyRJjTHcJUlqjOEuSVJjDHdJkhpjuEuS1BjDXZKkxhjukiQ1xnCXJKkx\nhrskSY0x3CVJaozhLklSYwx3SZIaY7hLktQYw12SpMYY7pIkNcZwlySpMYa7JEmNMdwlSWpMb+Ge\nZGWS7UluS3LJYbYvT3JDkq8m+aljGStJkg7pJdyTTAGXASuBZwEXJjlrRrd7gLcAvzaHsZIkqdPX\nzH0FsKOqdlbVPmAdcP5wh6q6u6q2APuOdawkSTqkr3A/Ddg1tL67axv3WEmSTjhLevqc6mNsknl8\njEZRNZ//KSVJfegr3PcAy4bWlzGYgS/o2B9689sfXH72inM5+5xzj61KHdWq5UsnXYIkndBu2bSR\nrZs3ztqvr3DfApyZ5AxgL3ABcOER+s6cfo889oK3/PT8K5Uk6Th19jkPnbh+5D2/cdh+vYR7Ve1P\nshq4BpgCrqiqbUku7ravTXIqcCPwWOBAkrcBz6qq+w83to+6JUlajPqauVNVG4ANM9rWDi3fyUMP\nvx91rCRJOjyfUCdJUmMMd0mSGmO4S5LUGMNdkqTGGO6SJDWmt6vl1QafAihJxz/DXcfkyu17J11C\n83wSoKT58rC8JEmNMdwlSWqM4S5JUmMMd0mSGmO4S5LUGMNdkqTGGO6SJDXGcJckqTGGuyRJjTHc\nJUlqjOEuSVJjDHdJkhpjuEuS1BjDXZKkxhjukiQ1xnCXJKkxhrskSY0x3CVJaozhLklSYwx3SZIa\nY7hLktQYw12SpMYY7pIkNcZwlySpMYa7JEmNMdwlSWpMb+GeZGWS7UluS3LJEfr8drf9c0meN9S+\nM8nNSW5KsrmvmiVJWoyW9PEhSaaAy4CXAnuAG5Osr6ptQ31eATy9qs5Mcg5wOfCCbnMB01V1bx/1\nSpK0mPU1c18B7KiqnVW1D1gHnD+jz6uADwBU1SbgG5M8aWh7eqlUkqRFrq9wPw3YNbS+u2sbtU8B\n1yXZkuSNY6tSkqQG9HJYnkE4j+JIs/Pvqaq9SU4BPpVke1Vdv0C1SZLUlL7CfQ+wbGh9GYOZ+dH6\nnN61UVV7u3/vTnI1g8P8Dwv3//U7v/bg8rNXnMvZ55y7ELVLknRcuGXTRrZu3jhrv77CfQtwZpIz\ngL3ABcCFM/qsB1YD65K8APinqroryaOAqaq6L8mjgZcBlx7uQy54y0+PqXxJkibv7HMeOnH9yHt+\n47D9egn3qtqfZDVwDTAFXFFV25Jc3G1fW1WfSPKKJDuArwAXdcNPBa5KcrDeD1XVtX3ULUnSYtTX\nzJ2q2gBsmNG2dsb66sOMux147nirkySpHT6hTpKkxhjukiQ1xnCXJKkxhrskSY0x3CVJaozhLklS\nYwx3SZIaY7hLktQYw12SpMYY7pIkNcZwlySpMYa7JEmNMdwlSWqM4S5JUmMMd0mSGmO4S5LUGMNd\nkqTGGO6SJDXGcJckqTGGuyRJjTHcJUlqjOEuSVJjDHdJkhpjuEuS1BjDXZKkxhjukiQ1xnCXJKkx\nhrskSY0x3CVJaozhLklSYwx3SZIaY7hLktQYw12SpMb0Fu5JVibZnuS2JJccoc9vd9s/l+R5xzJW\nkiQN9BLuSaaAy4CVwLOAC5OcNaPPK4CnV9WZwI8Bl486drG6ZdPGSZfQPPfx+LmP++F+Hr+W9nFf\nM/cVwI6q2llV+4B1wPkz+rwK+ABAVW0CvjHJqSOOXZS2bm7nP6Tjlft4/NzH/XA/j19L+7ivcD8N\n2DW0vrtrG6XP0hHGSpKkTl/hXiP2y1irkCTpBJCqUXN3Hh+SvABYU1Uru/WfAw5U1buG+vwu8JdV\nta5b3w58H/DU2cZ27eP/IpIkHWeq6mET4yU9ffYW4MwkZwB7gQuAC2f0WQ+sBtZ1fwz8U1XdleSe\nEcYe9stJknQi6iXcq2p/ktXANcAUcEVVbUtycbd9bVV9IskrkuwAvgJcdLSxfdQtSdJi1MtheUmS\n1B+fUDcBSd6X5K4kfzvpWlqVZFmSv0iyNcktSd466Zpak+Trk2xK8tkkn0/yPyZdU6uSTCW5Kcmf\nTrqWViXZmeTmbj9vnnQ98+XMfQKSvBC4H/jDqnrOpOtpUfeMhFOr6rNJTgb+Bni1p3QWVpJHVdUD\nSZYAnwF+uqo+M+m6WpPk7cB3Ao+pqldNup4WJfl74Dur6t5J17IQnLlPQFVdD3xx0nW0rKrurKrP\ndsv3A9sYPDNBC6iqHugWH8ngmpgm/o/xeJLkdOAVwO/j7cLj1sz+NdzVvO5Oi+cBmyZbSXuSnJTk\ns8BdwF9U1ecnXVODfhN4B3Bg0oU0roDrkmxJ8sZJFzNfhrua1h2S/xjwtm4GrwVUVQeq6rnA6cD3\nJpmecElNSfJK4B+r6iYamlUep86rqucBLwfe3J0+XbQMdzUrySOAK4E/qqqPT7qellXVl4D/DTx/\n0rU05lzgVd354A8DL07yhxOuqUlVdUf3793A1Qzea7JoGe5qUpIAVwCfr6p3T7qeFiX55iTf2C1/\nA/D9wE2TraotVfXzVbWsqp4KvAb4dFW9btJ1tSbJo5I8plt+NPAyYFHfzWS4T0CSDwMbgWck2ZXk\noknX1KDzgNcCL+pubbkpycpJF9WYJwOf7s65bwL+tKr+fMI1tc7bm8bjScD1Q/8t/1lVXTvhmubF\nW+EkSWqMM3dJkhpjuEuS1BjDXZKkxhjukiQ1xnCXJKkxhrskSY0x3CXNSZLLk/zCpOuQ9HDe5y6d\nQJLsBJ4I/CvwFeBTwJur6suzjPtPwBuqalE/b1s6UThzl04sBbyyqh4DfDvwHMDZt9QYw106QVXV\nXcC1wLMBkvxskh1Jvpxka5JXd+1nAZcD353kviT3du3vT/LfuuXpJLuTvD3JXUn2drN9uu1PSPKn\nSb6UZHOSX05yfbctSX6zG/elJDcneXavO0NqjOEunXgCkOR0YCWH3nO/A/ieqnoscCnwR0meVFXb\ngB8Hbqiqx1TVN3X9i4c+6/xJwGOBpcAbgPckeVy37T3AfV2fHwFeNzT2ZcALgTOr6nHAvwfuWdiv\nLJ1YDHfpxBLg40m+DPwD8HfALwNU1ceq6s5u+SPAbcA5Q+OO9PsO2gf8UlX9a1VtAO4HnplkCvi3\nwDur6qvdHwsfGBq7D3gMcFaSk6rq1oN1SJobw106sRRwfjc7nwZeTPcO9iSv696e98UkXwTOBp5w\nDL/7nqo6MLT+AHAycAqwBNg1tG33gwVVfRq4jMHs/q4kaw++flPS3Bju0gmqqv4K+B3gXUmeAvwe\n8Gbgm6rq8cAtHJpdH+m2mlFut7kb2A8sG2obXqaqfqeqng88C3gG8I5Rv4ekhzPcpRPbu4EVwOnA\nAeALwElJLmIwcz/oLuD0JI8YagtHPlz/oKr6V+AqYE2Sb0iyHPiPdH8YJHl+knO63/0A8FUGt+pJ\nmiPDXTqBVdUXGJz/fgfw68ANwJ0Mgv0zQ13/HNgK3JnkHw8O56Ez96PN4lcDj+t+9weADwP/0m17\nLPBe4F5gJ4M/MH51rt9Jkg+xkTQBSd4FPLGqLpp0LVKLnLlLGrskz0zybd097SuA1wNXT7ouqVVL\nJl2ApBPCYxgcil/K4Pz9r1XV+smWJLXLw/KSJDXGw/KSJDXGcJckqTGGuyRJjTHcJUlqjOEuSVJj\nDHdJkhrz/wFXtRiKcsMQgAAAAABJRU5ErkJggg==\n",
"text": [
"<matplotlib.figure.Figure at 0x119a4cf50>"
]
}
],
"prompt_number": 181
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Word Counts"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import nltk.stem\n",
"stem = nltk.stem.SnowballStemmer('english')\n",
"from nltk.corpus import stopwords\n",
"stopWords = stopwords.words(\"english\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 131,
"text": [
"u'graphic'"
]
}
],
"prompt_number": 131
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"stem.stem(\"Dancing\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 180,
"text": [
"u'danc'"
]
}
],
"prompt_number": 180
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"stopWords[:20]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 196,
"text": [
"[u'i',\n",
" u'me',\n",
" u'my',\n",
" u'myself',\n",
" u'we',\n",
" u'our',\n",
" u'ours',\n",
" u'ourselves',\n",
" u'you',\n",
" u'your',\n",
" u'yours',\n",
" u'yourself',\n",
" u'yourselves',\n",
" u'he',\n",
" u'him',\n",
" u'his',\n",
" u'himself',\n",
" u'she',\n",
" u'her',\n",
" u'hers']"
]
}
],
"prompt_number": 196
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def get_words(x):\n",
" ### get text words from reviews table\n",
" try:\n",
" s = str(x[4].strip())\n",
" ### get rid of punctuation \n",
" stripped_s = s.translate(string.maketrans(\"\",\"\"), string.punctuation)\n",
" ### remove stop words, \"\\n\", leading and trailing whitespace\n",
" return [word for word in stripped_s.replace(\"\\n\",\" \").strip().split(\" \") if word not in stopWords]\n",
" except:\n",
" ### UnicodeEncodeError: 'ascii' codec can't encode character u'\\xed' in position 1003: ordinal not in range(128)\n",
" return \"\""
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 143
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"rdd_texts = reviews.flatMap(lambda x: get_words(x))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 144
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"rdd_texts.first()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 145,
"text": [
"'dr'"
]
}
],
"prompt_number": 145
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"### not working: ImportError: No module named nltk.stem.snowball\n",
"rdd_stemmed_texts = rdd_texts.map(lambda x : stem.stem(x))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 148
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"wordCounts = rdd_texts.countByValue()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 151
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"wordCounts_sorted = sorted(wordCounts.items(), key=lambda x: x[1], reverse=True)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 169
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"wordCounts_sorted[:30]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 194,
"text": [
"[('I', 4843867),\n",
" ('The', 1684681),\n",
" ('place', 860426),\n",
" ('good', 839233),\n",
" ('food', 779140),\n",
" ('like', 644014),\n",
" ('great', 583999),\n",
" ('get', 568243),\n",
" ('time', 533053),\n",
" ('We', 532472),\n",
" ('one', 517918),\n",
" ('would', 456766),\n",
" ('back', 449195),\n",
" ('service', 448360),\n",
" ('go', 443821),\n",
" ('really', 437825),\n",
" ('It', 399264),\n",
" ('They', 381095),\n",
" ('This', 350447),\n",
" ('us', 318242),\n",
" ('got', 306747),\n",
" ('My', 306103),\n",
" ('nice', 301650),\n",
" ('dont', 300502),\n",
" ('also', 299930),\n",
" ('Im', 293104),\n",
" ('even', 280272),\n",
" ('little', 270659),\n",
" ('Ive', 268060),\n",
" ('well', 259919)]"
]
}
],
"prompt_number": 194
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"### eliminate upper_class stopWords\n",
"for k,v in wordCounts.items():\n",
" try:\n",
" if stem.stem(k) in stopWords:\n",
" del wordCounts[k]\n",
" ##AttributeError: 'int' object has no attribute 'lower'\n",
" except:\n",
" pass"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 176
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"wordCounts_sorted_stemmed = sorted(wordCounts.items(), key=lambda x: x[1], reverse=True)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 177
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"wordCounts_sorted_stemmed[:30]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 195,
"text": [
"[('place', 860426),\n",
" ('good', 839233),\n",
" ('food', 779140),\n",
" ('like', 644014),\n",
" ('great', 583999),\n",
" ('get', 568243),\n",
" ('time', 533053),\n",
" ('one', 517918),\n",
" ('would', 456766),\n",
" ('back', 449195),\n",
" ('service', 448360),\n",
" ('go', 443821),\n",
" ('really', 437825),\n",
" ('us', 318242),\n",
" ('got', 306747),\n",
" ('nice', 301650),\n",
" ('dont', 300502),\n",
" ('also', 299930),\n",
" ('Im', 293104),\n",
" ('even', 280272),\n",
" ('little', 270659),\n",
" ('Ive', 268060),\n",
" ('well', 259919),\n",
" ('didnt', 254223),\n",
" ('best', 251523),\n",
" ('much', 247143),\n",
" ('always', 245083),\n",
" ('ordered', 239653),\n",
" ('people', 237330),\n",
" ('restaurant', 231180)]"
]
}
],
"prompt_number": 195
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Review ages (review years from now)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def get_year(x):\n",
" return int(x[:4])\n",
" \n",
"rdd_years = reviews.map(lambda x: int(x[1][:4]))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 31
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"max_year = rdd_years.reduce(lambda x,y: max(x,y))\n",
"min_year = rdd_years.reduce(lambda x,y: min(x,y))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 33
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"max_year"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 34,
"text": [
"2015"
]
}
],
"prompt_number": 34
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"min_year"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 35,
"text": [
"2004"
]
}
],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"rating_ages = rdd_years.map(lambda yr: 2016 - yr).countByValue()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 36
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"rating_ages"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 39,
"text": [
"defaultdict(<type 'int'>, {1: 14665, 2: 486306, 3: 336273, 4: 244106, 5: 209429, 6: 137764, 7: 72948, 8: 45117, 9: 17724, 10: 4239, 11: 680, 12: 13})"
]
}
],
"prompt_number": 39
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"plt.bar(rating_ages.keys(), rating_ages.values(), width, color='g')\n",
"plt.xlabel('Rating Ages')\n",
"plt.ylabel('Counts')\n",
"x_axis = np.array(rating_ages.keys())\n",
"pos = np.array(rating_ages.keys()) \n",
"width = 1.0\n",
"ax = plt.axes()\n",
"ax.set_xticks(pos + (width / 2))\n",
"ax.set_xticklabels(x_axis)\n",
"fig = plt.gcf()\n",
"fig.set_size_inches(8, 6)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAgcAAAGCCAYAAAB0NGDNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAH2pJREFUeJzt3X24ZnVd7/H3B0YQFEEwh6cBS4HESEEDy9KNDziaAacH\nxVMx1Rw7NXa0OqdLOHbBRo2czinSTC8thIGCJE2zNJwR3WkPMkqo6EiAOckMMuggWHk0wO/5Y60t\n92+7Z2Yrs9e9mfv9uq597XX/1sP3d+952J/7t35rrVQVkiRJs/YadwckSdLSYjiQJEkNw4EkSWoY\nDiRJUsNwIEmSGoYDSZLUGDQcJNmc5JNJrk+ysW87OMmGJDclWZ/koJHtz01yc5Ibk5w20v7kJDf0\n61430r5vkrf17R9JcvTIulV9jZuSnD3Ue5Yk6cFm6JGDAqaq6sSqOrlvOwfYUFXHAtf0r0lyPPAi\n4HhgJfDGJOn3eROwuqqOAY5JsrJvXw1s79svAtb2xzoYOA84uf86fzSESJKk+43jtELmvD4dWNcv\nrwPO7JfPAK6sqnuqajNwC3BKksOAA6pqY7/dZSP7jB7rHcCz+uXnAuur6q6qugvYQBc4JEnSHOMY\nOXh/ko8leUnftryqtvXL24Dl/fLhwJaRfbcAR8zTvrVvp/9+K0BV3QvcneSQnRxLkiTNsWzgek+r\nqi8k+S5gQ5IbR1dWVSXxfs6SJI3RoOGgqr7Qf/9iknfSnf/fluTQqrq9P2VwR7/5VmDFyO5H0n3i\n39ovz22f3eco4LYky4ADq2p7kq3A1Mg+K4APjPbNUCJJmkRVNfd0/3CnFZLsn+SAfvlhwGnADcC7\ngVX9ZquAd/XL7wbOSrJPku8GjgE2VtXtwFeSnNJPUPxZ4C9H9pk91k/STXAEWA+cluSgJI8EngO8\nb24fq+pB+3X++edb3/oTWX+S37v1rf9Av3ZkyJGD5cA7+wsOlgF/WlXrk3wMuCrJamAz8EKAqtqU\n5CpgE3AvsKbufydrgEuB/YD3VtXVffvFwOVJbga2A2f1x7ozyauBj/bbXVDdxERJkjTHYOGgqj4H\nPGme9juBZ+9gnwuBC+dpvw44YZ72r9OHi3nWXQJc8u31WpKkyeMdEvcQU1NT1rf+RNaf5Pdufesv\nluzsnMMkSVL+LCRJkyQJNc4JiZIk6cHBcCBJkhqGA0mS1Bj6Dolaou5/ptX4OOdDkpYGw4HuNz2h\ntSVJDU8rSJKkhuFAkiQ1DAeSJKlhOJAkSQ3DgSRJahgOJElSw3AgSZIahgNJktQwHEiSpIbhQJIk\nNQwHkiSpYTiQJEkNw4EkSWoYDiRJUsNwIEmSGoYDSZLUMBxIkqSG4UCSJDUMB5IkqWE4kCRJDcOB\nJElqGA4kSVLDcCBJkhqGA0mS1DAcSJKkhuFAkiQ1DAeSJKlhOJAkSQ3DgSRJahgOJElSw3AgSZIa\nhgNJktQwHEiSpIbhQJIkNQwHkiSpYTiQJEkNw4EkSWoYDiRJUsNwIEmSGoYDSZLUMBxIkqSG4UCS\nJDUMB5IkqWE4kCRJDcOBJElqGA4kSVLDcCBJkhqGA0mS1DAcSJKkhuFAkiQ1DAeSJKkxaDhIsneS\n65P8Vf/64CQbktyUZH2Sg0a2PTfJzUluTHLaSPuTk9zQr3vdSPu+Sd7Wt38kydEj61b1NW5KcvZQ\n71eSpAejoUcOXg5sAqp/fQ6woaqOBa7pX5PkeOBFwPHASuCNSdLv8yZgdVUdAxyTZGXfvhrY3rdf\nBKztj3UwcB5wcv91/mgIkSRJrcHCQZIjgecDfwzM/qI/HVjXL68DzuyXzwCurKp7qmozcAtwSpLD\ngAOqamO/3WUj+4we6x3As/rl5wLrq+quqroL2EAXOCRJ0jyGHDm4CPgN4Bsjbcuralu/vA1Y3i8f\nDmwZ2W4LcMQ87Vv7dvrvtwJU1b3A3UkO2cmxJEnSPAYJB0leANxRVddz/6hBo6qK+083SJKkMVk2\nUJ0fAk5P8nzgocAjklwObEtyaFXd3p8yuKPffiuwYmT/I+k+8W/tl+e2z+5zFHBbkmXAgVW1PclW\nYGpknxXAB+br5PT09DeXp6ammJqamm8zSZIelGZmZpiZmdnlduk+sA8nyTOA/1VVP5bkd+gmEa5N\ncg5wUFWd009IvIJuAuERwPuBx1VVJbkWeBmwEXgP8PqqujrJGuCEqvrlJGcBZ1bVWf2ExI8BJ9GN\nWlwHnNTPPxjtVw39s1hKksD0GDswDZP885ekcUhCVX3LiP5QIwdzzf4WeC1wVZLVwGbghQBVtSnJ\nVXRXNtwLrBn5zb0GuBTYD3hvVV3dt18MXJ7kZmA7cFZ/rDuTvBr4aL/dBXODgSRJut/gIwdLlSMH\njhxI0qTZ0ciBd0iUJEkNw4EkSWoYDiRJUsNwIEmSGoYDSZLUMBxIkqSG4UCSJDUMB5IkqWE4kCRJ\nDcOBJElqGA4kSVLDcCBJkhqGA0mS1DAcSJKkhuFAkiQ1DAeSJKlhOJAkSQ3DgSRJahgOJElSw3Ag\nSZIahgNJktQwHEiSpIbhQJIkNQwHkiSpYTiQJEkNw4EkSWoYDiRJUsNwIEmSGoYDSZLUMBxIkqSG\n4UCSJDUMB5IkqWE4kCRJDcOBJElqGA4kSVLDcCBJkhqGA0mS1DAcSJKkhuFAkiQ1lo27A9KsJGOt\nX1VjrS9JS4XhQEvH9ITWlqQlxtMKkiSpYTiQJEkNw4EkSWoYDiRJUsNwIEmSGoYDSZLUMBxIkqSG\n4UCSJDUMB5IkqWE4kCRJDcOBJElqGA4kSVLDcCBJkhqGA0mS1DAcSJKkhuFAkiQ1DAeSJKkxSDhI\n8tAk1yb5eJJNSX67bz84yYYkNyVZn+SgkX3OTXJzkhuTnDbS/uQkN/TrXjfSvm+St/XtH0ly9Mi6\nVX2Nm5KcPcR7liTpwWqQcFBVXwNOraonAd8PnJrkh4FzgA1VdSxwTf+aJMcDLwKOB1YCb0yS/nBv\nAlZX1THAMUlW9u2rge19+0XA2v5YBwPnASf3X+ePhhBJktQa7LRCVX21X9wH2Bv4MnA6sK5vXwec\n2S+fAVxZVfdU1WbgFuCUJIcBB1TVxn67y0b2GT3WO4Bn9cvPBdZX1V1VdRewgS5wSJKkeQwWDpLs\nleTjwDbgg1X1aWB5VW3rN9kGLO+XDwe2jOy+BThinvatfTv991sBqupe4O4kh+zkWJIkaR7LhipU\nVd8AnpTkQOB9SU6ds76S1FD9kSRJ8xssHMyqqruTvAd4MrAtyaFVdXt/yuCOfrOtwIqR3Y6k+8S/\ntV+e2z67z1HAbUmWAQdW1fYkW4GpkX1WAB+Yr2/T09PfXJ6ammJqamq+zSRJelCamZlhZmZml9ul\navE/rCd5FHBvVd2VZD/gfcAFdPMBtlfV2iTnAAdV1Tn9hMQr6CYQHgG8H3hcP7pwLfAyYCPwHuD1\nVXV1kjXACVX1y0nOAs6sqrP6CYkfA04CAlwHnNTPPxjtYw3xs1iqksD0GDswzdjrT/Kfv6TJlISq\nytz2oUYODgPWJdmLbp7D5VV1TZLrgauSrAY2Ay8EqKpNSa4CNgH3AmtGfnOvAS4F9gPeW1VX9+0X\nA5cnuRnYDpzVH+vOJK8GPtpvd8HcYCBJku43yMjBg4EjB44cTPKfv6TJtKORA++QKEmSGoYDSZLU\nMBxIkqSG4UCSJDUMB5IkqWE4kCRJDcOBJElqGA4kSVLDcCBJkhqGA0mS1FhQOEiSOa+fmeQZi9Ml\nSZI0TgsdOfjbJE8DSPIK4ErgyiSvXLSeSZKksVhoOHgC8JF++ReBZwKnAL+0GJ2SJEnjs9BHNu8F\nkOSxAFX16f5UwyMXq2OSJGk8FhoO/h54A3AY8M6+7bHAFxejU5IkaXwWelrh54C7gE8A033bccDr\ndn+XJEnSOC105ODUqjp3tKGq3pPkpxahT5IkaYwWOnLw1h20v2V3dUSSJC0NOx05SPI9QLrFfM+c\n1Y8F/t9idUySJI3Hrk4r3LKDZYBt3D//QJIk7SF2Gg6qavYSxg9V1dOH6ZIkSRqnBc05MBhIkjQ5\nFnS1Qj/f4LeAJwEPH1lVVXXUYnRMkiSNx0IvZbyCbs7Br+MkREmS9mgLDQfHA0+rqvsWszOSJGn8\nFnqfgw8BJy5mRyRJ0tKw0JGDfwWuTvIXdJcwzqqqOm/3d0uSJI3LQsPBw4C/Bh4CHNm3BajF6JQk\nSRqfBYWDqvq5Re6HJElaIr6dSxnnVVX/svu6I0mSxm2hpxXm3jp5VgF776a+SJKkJWChpxWaqxqS\nHEr3XIUPL0KfJEnSGC30UsZGVd0O/Cpw4e7tjiRJGreFnlaYz3HA/rurI9K4JRlr/Sov/pG0NCx0\nQuLc0wf7A08AXrXbeySNy/SE1pakORY6cnDxnNf/AXyiqm7azf2RJEljttAJiZcucj8kSdISsaAJ\niUn2SfKqJJ9L8vX++6uS7LPYHZQkScNa6GmFtcDJwH8HPg8cBZwHPILuqgVJkrSHWGg4eCHwxKr6\nUv/6xiT/BHwSw4EkSXuU7+g+B5Ikac+10HDw58C7k6xM8vgkzwP+sm+XJEl7kIWeVngF8ErgDcDh\nwG3AlcBrFqlfkiRpTHY6cpDkaUnWVtXXq+q8qnpcVe1fVY8D9gFOHKabkiRpKLs6rfBK4EM7WDfT\nr5ckSXuQXYWDJwFX72Dd+4Gn7N7uSJKkcdtVODiA7vTBfB7Sr5ckSXuQXYWDfwaeu4N1zwE+s3u7\nI0mSxm1X4eD3gDcn+YkkewEk2SvJTwBvBi5a7A5KkqRh7fRSxqq6IsmhwKXAnyX5EvAo4OvAeVV1\nxeJ3UZIkDWmX9zmoqt9LcjHwg8AhwHbgH6vq7sXunCRJGt5CH9l8Nzu+akGSJO1BfLaCJElqGA4k\nSVLDcCBJkhqGA0mS1DAcSJKkxkIf2SxpkSUZa/2qGmt9SUvHYOEgyQrgMuDRQAFvqarXJzkYeBtw\nNLAZeGFV3dXvcy7wC8B9wMuqan3f/mS6GzM9FHhvVb28b9+3r3ES3f0YXlRV/9qvW8X9T5F8TVVd\nttjvWfq2TE9obUlLzpCnFe4Bfq2qngA8FXhpkscD5wAbqupY4Jr+NUmOB14EHA+sBN6Y+z9avQlY\nXVXHAMckWdm3rwa29+0XAWv7Yx0MnAec3H+dn+SgxX7DkiQ9GA0WDqrq9qr6eL/873QPbToCOB1Y\n12+2DjizXz4DuLKq7qmqzcAtwClJDgMOqKqN/XaXjewzeqx3AM/ql58LrK+qu/pRiQ10gUOSJM0x\nlgmJSR4DnAhcCyyvqm39qm3A8n75cGDLyG5b6MLE3PatfTv991sBqupe4O4kh+zkWJIkaY7Bw0GS\nh9N9qn95Vf3b6LrqZkQ5K0qSpDEa9GqFJA+hCwaXV9W7+uZtSQ6tqtv7UwZ39O1bgRUjux9J94l/\na788t312n6OA25IsAw6squ1JtgJTI/usAD4wt3/T09PfXJ6ammJqamruJpIkPWjNzMwwMzOzy+0y\n1OVL/WTCdXQTBn9tpP13+ra1Sc4BDqqqc/oJiVfQTSA8Ang/8LiqqiTXAi8DNgLvAV5fVVcnWQOc\nUFW/nOQs4MyqOqufkPgxuqsYAlwHnDR7VUTfj5rkS7mSjH+2vPXHWn+S//5LkyoJVfUt11EPOXLw\nNOBngE8mub5vOxd4LXBVktX0lzICVNWmJFcBm4B7gTUjv73X0F3KuB/dpYyzT4y8GLg8yc10lzKe\n1R/rziSvBj7ab3fBaDCQJEn3GywcVNXfseM5Ds/ewT4XAhfO034dcMI87V+nDxfzrLsEuGSh/ZUk\naVJ5+2RJktQwHEiSpIbhQJIkNQwHkiSpYTiQJEkNw4EkSWoYDiRJUsNwIEmSGoYDSZLUMBxIkqSG\n4UCSJDUMB5IkqWE4kCRJDcOBJElqGA4kSVLDcCBJkhqGA0mS1DAcSJKkhuFAkiQ1DAeSJKlhOJAk\nSQ3DgSRJahgOJElSw3AgSZIahgNJktQwHEiSpIbhQJIkNQwHkiSpYTiQJEkNw4EkSWoYDiRJUsNw\nIEmSGoYDSZLUMBxIkqSG4UCSJDUMB5IkqWE4kCRJDcOBJElqLBt3ByQtDUnGVruqxlZb0rcyHEjq\nTE9YXUk75GkFSZLUMBxIkqSG4UCSJDUMB5IkqWE4kCRJDcOBJElqGA4kSVLDcCBJkhqGA0mS1DAc\nSJKkhuFAkiQ1DAeSJKlhOJAkSQ3DgSRJahgOJElSw3AgSZIahgNJktQwHEiSpMZg4SDJW5NsS3LD\nSNvBSTYkuSnJ+iQHjaw7N8nNSW5MctpI+5OT3NCve91I+75J3ta3fyTJ0SPrVvU1bkpy9hDvV5Kk\nB6shRw4uAVbOaTsH2FBVxwLX9K9JcjzwIuD4fp83Jkm/z5uA1VV1DHBMktljrga29+0XAWv7Yx0M\nnAec3H+dPxpCJElSa7BwUFUfBr48p/l0YF2/vA44s18+A7iyqu6pqs3ALcApSQ4DDqiqjf12l43s\nM3qsdwDP6pefC6yvqruq6i5gA98aUiRJUm/ccw6WV9W2fnkbsLxfPhzYMrLdFuCIedq39u30328F\nqKp7gbuTHLKTY0mSpHmMOxx8U1UVUOPuhyRJk27ZmOtvS3JoVd3enzK4o2/fCqwY2e5Iuk/8W/vl\nue2z+xwF3JZkGXBgVW1PshWYGtlnBfCB+TozPT39zeWpqSmmpqbm20ySpAelmZkZZmZmdrnduMPB\nu4FVdJMHVwHvGmm/Isnv0Z0COAbYWFWV5CtJTgE2Aj8LvH7OsT4C/CTdBEeA9cCF/STEAM8BXjFf\nZ0bDgSRJe5q5H3wvuOCCebcbLBwkuRJ4BvCoJLfSXUHwWuCqJKuBzcALAapqU5KrgE3AvcCa/rQD\nwBrgUmA/4L1VdXXffjFweZKbge3AWf2x7kzyauCj/XYX9BMTJUnSPAYLB1X14h2sevYOtr8QuHCe\n9uuAE+Zp/zp9uJhn3SV0l1JKkqRdWDITEiVJ0tJgOJAkSQ3DgSRJahgOJElSw3AgSZIahgNJktQw\nHEiSpIbhQJIkNQwHkiSpYTiQJEkNw4EkSWoYDiRJUsNwIEmSGoYDSZLUGOyRzZK0I0nGWr+qxlpf\nWmoMB5LGb3pCa0tLlKcVJElSw3AgSZIahgNJktQwHEiSpIbhQJIkNQwHkiSpYTiQJEkNw4EkSWoY\nDiRJUsNwIEmSGoYDSZLUMBxIkqSG4UCSJDUMB5IkqWE4kCRJDcOBJElqGA4kSVJj2bg7IEnjlmSs\n9atqrPWluQwHkjQ9obWlHfC0giRJahgOJElSw3AgSZIahgNJktQwHEiSpIbhQJIkNQwHkiSpYTiQ\nJEkNw4EkSWoYDiRJUsNwIEmSGoYDSZLUMBxIkqSG4UCSJDUMB5IkqWE4kCRJjWXj7oAkTbokY61f\nVWOtr6XHcCBJ4zY9obW1ZBkOlohxf3KQJGmW4WApmZ7Q2pKkJcUJiZIkqWE4kCRJDcOBJElqTEw4\nSLIyyY1Jbk7yinH3R5KkpWoiwkGSvYE3ACuB44EXJ3n8eHu1m33O+taf0PqT/N6XQP2ZmRnr74Em\nIhwAJwO3VNXmqroH+DPgjDH3affabH3rT2j9cdbeQ+on+Y6/Tj311Ae0/wO9jHvcv5zHXX+xTMql\njEcAt4683gKcMqa+SNLSMv0A9v0gcOqYamvRTEo48N6gkrREPdDRgwsuuOAB7e/to79VJuGHkuSp\nwHRVrexfnwt8o6rWjmyz5/8gJEmao6q+JZ1NSjhYBvwz8CzgNmAj8OKq+sxYOyZJ0hI0EacVqure\nJL8CvA/YG7jYYCBJ0vwmYuRAkiQt3KRcyrjHSvLWJNuS3DCm+iuSfDDJp5N8KsnLBqz90CTXJvl4\nkk1Jfnuo2nP6sXeS65P81Rhqb07yyb7+xjHUPyjJ25N8pv8zeOqAtY/r3/fs191D/v3r+3Bu/3f/\nhiRXJNl34Pov72t/KsnLB6j3Lf/fJDk4yYYkNyVZn+Sggev/VP9ncF+Skxar9k7q/5/+7/8nkvxF\nkgMXsw9DMRw8+F1Cd3OncbkH+LWqegLwVOClQ91gqqq+BpxaVU8Cvh84NckPD1F7jpcDmxjPVTEF\nTFXViVV18hjqvw54b1U9nu7PYLDTdVX1z/37PhF4MvBV4J1D1U/yGOAlwElVdQLdKcuzBqz/fcB/\nA34AeCLwgiSPXeSy8/1/cw6woaqOBa7pXw9Z/wbgvwAfWsS6O6u/HnhCVT0RuAk4d4B+LDrDwYNc\nVX0Y+PIY699eVR/vl/+d7pfD4QPW/2q/uA/df853DlUbIMmRwPOBPwYe2PVYD6AbYynafUL6kap6\nK3Rze6rq7nH0BXg28NmqunWXW+4+X6ELx/v3k573B7YOWP97gWur6mtVdR/wt8CPL2bBHfx/czqw\nrl9eB5w5ZP2qurGqblqsmguov6GqvtG/vBY4coi+LDbDgXab/pPUiXT/QIaquVeSjwPbgA9W1aah\navcuAn4D+MauNlwkBbw/yceSvGTg2t8NfDHJJUn+KckfJdl/4D7MOgu4YsiCVXUn8LvA5+mugrqr\nqt4/YBc+BfxIP6y/P/CjjOcX0/Kq2tYvbwOWj6EPS8UvAO8ddyd2B8OBdoskDwfeDry8H0EYRFV9\noz+tcCTw9CRTQ9VO8gLgjqq6nvGNGjytH1Z/Ht0pnR8ZsPYy4CTgjVV1EvAfLO6Q8ryS7AP8GPDn\nA9d9LPCrwGPoRssenuSnh6pfVTcCa+mGtf8GuJ7xhdTZPhUTetO5JK8E/rOqBg2pi8VwoAcsyUOA\ndwB/UlXvGkcf+uHs9wBPGbDsDwGnJ/kccCXwzCSXDVifqvpC//2LdOfbh5x3sAXYUlUf7V+/nS4s\nDO15wHX9z2BITwH+oaq2V9W9wF/Q/Z0YTFW9taqeUlXPAO6iu5/L0LYlORQgyWHAHWPow1gl+Tm6\n04uDhcPFZjjQA5LuvqcXA5uq6vcHrv2o2ZnRSfYDnkP36WkQVfW/q2pFVX033bD2B6rq7KHqJ9k/\nyQH98sOA0+gmZw2iqm4Hbk1ybN/0bODTQ9Uf8WK6cDa0G4GnJtmv/3fwbLqJqYNJ8uj++1F0k/LG\n8an13cCqfnkVMJYPCL3BR/CSrKQ7tXhGP0l6jzARN0HakyW5EngGcEiSW4HzquqSAbvwNOBngE8m\nmf3FfG5VXT1A7cOAdUn2ogu6l1fVNQPU3ZGhh1OXA+/s70u/DPjTqlo/cB/+B/Cn/dD+Z4GfH7J4\nH4qeTXfVwKCq6hP9SNHH6Ibz/wl4y8DdeHuSQ+gmRq6pqq8sZrGR/28eNfv/DfBa4Kokq+meEfnC\nAeufTzcJ+Q+ARwHvSXJ9VT1vwPrn0k2I3tD/W/zHqlqzGPWH5E2QJElSw9MKkiSpYTiQJEkNw4Ek\nSWoYDiRJUsNwIEmSGoYDSZLUMBxI2i2SvCnJb467H5IeOO9zIE2oJJuBRwP30T0XYQPw0oXcSKe/\nXezqqhrsWQ798ztuBz5UVc8fqq40iRw5kCZXAS+oqgOAJwInAEv5k/9P0D0BcSrJJD/5T1p0hgNJ\n9I/cXQ88YbYtyTlJbknylSSfTnJm3/544E3ADyb5tyR39u2XJnl1vzyVZEuSX0+yLclt/WjD7LEP\nSfJXSe5OsjHJa5J8eBfdXAX8MfD3dLfs/qYkJyW5vu/rVUneNtuXfv0Lknw8yZeT/H2SE0bWvaLv\n61eS3Jjkmd/RD1HagxgOpMkWgCRHAiuBa0fW3QL8cFU9ArgA+JMky6vqM8Av0d1D/oCqOrjffu7j\nepcDj6B7nPFq4A+THNiv+0Pg3/ptVgFns5NnUyQ5Gng6cFX/dfbIun3onkj5VuCRdA9hOnP2eElO\npHs42EuAg4E3A+9O8pAkxwEvBZ7Sv8/T6J4PIE00w4E0uQK8K8lX6IbrPwu8ZnZlVb29f/IiVXUV\ncDNwysi+OzrmrHuAV1XVfVX1N8C/A8cl2Rv4ceD8qvpaHzbW7eSYAD8LbKyqLXSPRj4+yZP6dU8F\n9q6qP+hrvRPYOLLvLwJvrqqPVucy4OvADwL3AvsCT0jykKr6fFX9y076IU0Ew4E0uYruMbOPAKaA\nZwJPmV2Z5Ox+qP7LSb4MfB9wyLdx/O1V9Y2R118FHg58F91TJG8dWbdlF8c6G/hzgKraDsxw/2OC\nDwe2ztl+9NhHA/9z9n307+VI4LCq+izwq8A0sC3JlUkOW9jbk/ZchgNJVNWH6B57uxa+OYz/Froh\n94Or6pHAp7j/0/2OTgEs5PKnL9J9Yl8x0rZiB9uS5IeAxwG/meQLSb5A96n/v/ajEF8Ajpiz21Ej\ny58HfquqHjny9fCqehtAVV3ZX3VxdN//tQt4D9IezXAgadbvAycnOQV4GN0vyi8BeyX5ebqRg1nb\ngCOTPGSkLez81AAAVXUf3amB6ST7JfleutMGOwoWq+gmSz6e7qqKJ/Z92Q94HvAPwH1JfiXJsiRn\nAD8wsv8fAb+U5OR0HpbkR5M8PMmxSZ6ZZF+6Uw1fo7u0U5pohgNJAFTVl+jO/b+iqjYBvwv8I929\nBb4P+LuRza8BPg3cnuSO2UPQ/oLf2SjCrwAH9sdeRzeJ8D/nbpTkocBPAX9QVXeMfG0GLgfOrqp7\n6OYwrAa+DPw08Nezx6uq6+gmI74BuJNu7sTshMZ9gd+mG834AvAo4Nyd9FuaCN4ESdLYJVkLPLqq\nfn43He9a4I1VtW53HE+aNI4cSBpckuOSfH8/zH8y8At0lyN+p8d7epJD+9MKq+hGOq7eXf2VJs2y\ncXdA0kQ6gO5UwuF08xf+b1W9+wEc7zi6+x88jO6SzJ/sb+wk6TvgaQVJktTwtIIkSWoYDiRJUsNw\nIEmSGoYDSZLUMBxIkqSG4UCSJDX+P5qt8TIyV2YRAAAAAElFTkSuQmCC\n",
"text": [
"<matplotlib.figure.Figure at 0x11ba49150>"
]
}
],
"prompt_number": 66
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment