pilipolio/201610_MovieLens_1m.ipynb

## 201610_MovieLens_1m.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2016-10-17 14:09:06--  http://files.grouplens.org/datasets/movielens/ml-1m.zip\n",
      "Resolving files.grouplens.org... 128.101.34.146\n",
      "Connecting to files.grouplens.org|128.101.34.146|:80... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 5917549 (5.6M) [application/zip]\n",
      "Saving to: 'ml-1m.zip.1'\n",
      "\n",
      "ml-1m.zip.1         100%[=====================>]   5.64M   550KB/s   in 21s    \n",
      "\n",
      "2016-10-17 14:09:27 (279 KB/s) - 'ml-1m.zip.1' saved [5917549/5917549]\n",
      "\n",
      "Archive:  ml-1m.zip\n",
      "  inflating: ./ml-1m/movies.dat      \n",
      "  inflating: ./ml-1m/ratings.dat     \n",
      "  inflating: ./ml-1m/README          \n",
      "  inflating: ./ml-1m/users.dat       \n"
     ]
    }
   ],
   "source": [
    "! wget http://files.grouplens.org/datasets/movielens/ml-1m.zip\n",
    "! unzip ml-1m.zip -d ."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/gui/.virtualenvs/gui3/lib/python3.5/site-packages/ipykernel/__main__.py:3: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
      "  app.launch_new_instance()\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>item</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1193</td>\n",
       "      <td>5</td>\n",
       "      <td>978300760</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>661</td>\n",
       "      <td>3</td>\n",
       "      <td>978302109</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>914</td>\n",
       "      <td>3</td>\n",
       "      <td>978301968</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>3408</td>\n",
       "      <td>4</td>\n",
       "      <td>978300275</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>2355</td>\n",
       "      <td>5</td>\n",
       "      <td>978824291</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user  item  rating  timestamp\n",
       "0     1  1193       5  978300760\n",
       "1     1   661       3  978302109\n",
       "2     1   914       3  978301968\n",
       "3     1  3408       4  978300275\n",
       "4     1  2355       5  978824291"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "\n",
    "ratings = pd.read_csv('./ml-1m/ratings.dat', sep='::', names=['user', 'item', 'rating', 'timestamp'])\n",
    "ratings.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_support</th>\n",
       "      <th>item_support</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>6040.000000</td>\n",
       "      <td>3706.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>165.597517</td>\n",
       "      <td>269.889099</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>192.747029</td>\n",
       "      <td>384.047838</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>20.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>44.000000</td>\n",
       "      <td>33.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>96.000000</td>\n",
       "      <td>123.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>208.000000</td>\n",
       "      <td>350.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>2314.000000</td>\n",
       "      <td>3428.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       user_support  item_support\n",
       "count   6040.000000   3706.000000\n",
       "mean     165.597517    269.889099\n",
       "std      192.747029    384.047838\n",
       "min       20.000000      1.000000\n",
       "25%       44.000000     33.000000\n",
       "50%       96.000000    123.500000\n",
       "75%      208.000000    350.000000\n",
       "max     2314.000000   3428.000000"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "item_supports = ratings.groupby('item')['user'].nunique().to_frame('item_support')\n",
    "item_supports.describe()\n",
    "\n",
    "user_supports = ratings.groupby('user')['item'].nunique().to_frame('user_support')\n",
    "user_supports.describe()\n",
    "\n",
    "pd.concat([user_supports.describe(), item_supports.describe()], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/gui/.virtualenvs/gui3/lib/python3.5/site-packages/ipykernel/__main__.py:1: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
      "  if __name__ == '__main__':\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>genres</th>\n",
       "      <th>first_genre</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>item</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Toy Story (1995)</td>\n",
       "      <td>Animation|Children's|Comedy</td>\n",
       "      <td>Animation</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Jumanji (1995)</td>\n",
       "      <td>Adventure|Children's|Fantasy</td>\n",
       "      <td>Adventure</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Grumpier Old Men (1995)</td>\n",
       "      <td>Comedy|Romance</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Waiting to Exhale (1995)</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Father of the Bride Part II (1995)</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   title                        genres  \\\n",
       "item                                                                     \n",
       "1                       Toy Story (1995)   Animation|Children's|Comedy   \n",
       "2                         Jumanji (1995)  Adventure|Children's|Fantasy   \n",
       "3                Grumpier Old Men (1995)                Comedy|Romance   \n",
       "4               Waiting to Exhale (1995)                  Comedy|Drama   \n",
       "5     Father of the Bride Part II (1995)                        Comedy   \n",
       "\n",
       "     first_genre  \n",
       "item              \n",
       "1      Animation  \n",
       "2      Adventure  \n",
       "3         Comedy  \n",
       "4         Comedy  \n",
       "5         Comedy  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movies = pd.read_csv('./ml-1m/movies.dat', sep='::', names=['item', 'title', 'genres'], index_col='item')\n",
    "movies['first_genre'] = movies.genres.str.split('|').str.get(0)\n",
    "\n",
    "movies.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/gui/.virtualenvs/gui3/lib/python3.5/site-packages/ipykernel/__main__.py:1: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
      "  if __name__ == '__main__':\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>gender</th>\n",
       "      <th>age</th>\n",
       "      <th>occupation</th>\n",
       "      <th>zipcode</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>user</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>F</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>48067</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M</td>\n",
       "      <td>56</td>\n",
       "      <td>16</td>\n",
       "      <td>70072</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "      <td>15</td>\n",
       "      <td>55117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M</td>\n",
       "      <td>45</td>\n",
       "      <td>7</td>\n",
       "      <td>02460</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>M</td>\n",
       "      <td>25</td>\n",
       "      <td>20</td>\n",
       "      <td>55455</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     gender  age  occupation zipcode\n",
       "user                                \n",
       "1         F    1          10   48067\n",
       "2         M   56          16   70072\n",
       "3         M   25          15   55117\n",
       "4         M   45           7   02460\n",
       "5         M   25          20   55455"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# See http://files.grouplens.org/datasets/movielens/ml-1m-README.txt for more details\n",
    "users = pd.read_csv('./ml-1m/users.dat', sep='::', names=['user', 'gender', 'age', 'occupation', 'zipcode'], index_col='user')\n",
    "users.head()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"--2016-10-17 14:09:06-- http://files.grouplens.org/datasets/movielens/ml-1m.zip\n",
	"Resolving files.grouplens.org... 128.101.34.146\n",
	"Connecting to files.grouplens.org\|128.101.34.146\|:80... connected.\n",
	"HTTP request sent, awaiting response... 200 OK\n",
	"Length: 5917549 (5.6M) [application/zip]\n",
	"Saving to: 'ml-1m.zip.1'\n",
	"\n",
	"ml-1m.zip.1 100%[=====================>] 5.64M 550KB/s in 21s \n",
	"\n",
	"2016-10-17 14:09:27 (279 KB/s) - 'ml-1m.zip.1' saved [5917549/5917549]\n",
	"\n",
	"Archive: ml-1m.zip\n",
	" inflating: ./ml-1m/movies.dat \n",
	" inflating: ./ml-1m/ratings.dat \n",
	" inflating: ./ml-1m/README \n",
	" inflating: ./ml-1m/users.dat \n"
	]
	}
	],
	"source": [
	"! wget http://files.grouplens.org/datasets/movielens/ml-1m.zip\n",
	"! unzip ml-1m.zip -d ."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/Users/gui/.virtualenvs/gui3/lib/python3.5/site-packages/ipykernel/__main__.py:3: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
	" app.launch_new_instance()\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>user</th>\n",
	" <th>item</th>\n",
	" <th>rating</th>\n",
	" <th>timestamp</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1</td>\n",
	" <td>1193</td>\n",
	" <td>5</td>\n",
	" <td>978300760</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>1</td>\n",
	" <td>661</td>\n",
	" <td>3</td>\n",
	" <td>978302109</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>1</td>\n",
	" <td>914</td>\n",
	" <td>3</td>\n",
	" <td>978301968</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>1</td>\n",
	" <td>3408</td>\n",
	" <td>4</td>\n",
	" <td>978300275</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>1</td>\n",
	" <td>2355</td>\n",
	" <td>5</td>\n",
	" <td>978824291</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" user item rating timestamp\n",
	"0 1 1193 5 978300760\n",
	"1 1 661 3 978302109\n",
	"2 1 914 3 978301968\n",
	"3 1 3408 4 978300275\n",
	"4 1 2355 5 978824291"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"import pandas as pd\n",
	"\n",
	"ratings = pd.read_csv('./ml-1m/ratings.dat', sep='::', names=['user', 'item', 'rating', 'timestamp'])\n",
	"ratings.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>user_support</th>\n",
	" <th>item_support</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>count</th>\n",
	" <td>6040.000000</td>\n",
	" <td>3706.000000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>mean</th>\n",
	" <td>165.597517</td>\n",
	" <td>269.889099</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>std</th>\n",
	" <td>192.747029</td>\n",
	" <td>384.047838</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>min</th>\n",
	" <td>20.000000</td>\n",
	" <td>1.000000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>25%</th>\n",
	" <td>44.000000</td>\n",
	" <td>33.000000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>50%</th>\n",
	" <td>96.000000</td>\n",
	" <td>123.500000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>75%</th>\n",
	" <td>208.000000</td>\n",
	" <td>350.000000</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>max</th>\n",
	" <td>2314.000000</td>\n",
	" <td>3428.000000</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" user_support item_support\n",
	"count 6040.000000 3706.000000\n",
	"mean 165.597517 269.889099\n",
	"std 192.747029 384.047838\n",
	"min 20.000000 1.000000\n",
	"25% 44.000000 33.000000\n",
	"50% 96.000000 123.500000\n",
	"75% 208.000000 350.000000\n",
	"max 2314.000000 3428.000000"
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"item_supports = ratings.groupby('item')['user'].nunique().to_frame('item_support')\n",
	"item_supports.describe()\n",
	"\n",
	"user_supports = ratings.groupby('user')['item'].nunique().to_frame('user_support')\n",
	"user_supports.describe()\n",
	"\n",
	"pd.concat([user_supports.describe(), item_supports.describe()], axis=1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/Users/gui/.virtualenvs/gui3/lib/python3.5/site-packages/ipykernel/__main__.py:1: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
	" if __name__ == '__main__':\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>title</th>\n",
	" <th>genres</th>\n",
	" <th>first_genre</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>item</th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>Toy Story (1995)</td>\n",
	" <td>Animation\|Children's\|Comedy</td>\n",
	" <td>Animation</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>Jumanji (1995)</td>\n",
	" <td>Adventure\|Children's\|Fantasy</td>\n",
	" <td>Adventure</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>Grumpier Old Men (1995)</td>\n",
	" <td>Comedy\|Romance</td>\n",
	" <td>Comedy</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>Waiting to Exhale (1995)</td>\n",
	" <td>Comedy\|Drama</td>\n",
	" <td>Comedy</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>Father of the Bride Part II (1995)</td>\n",
	" <td>Comedy</td>\n",
	" <td>Comedy</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" title genres \\\n",
	"item \n",
	"1 Toy Story (1995) Animation\|Children's\|Comedy \n",
	"2 Jumanji (1995) Adventure\|Children's\|Fantasy \n",
	"3 Grumpier Old Men (1995) Comedy\|Romance \n",
	"4 Waiting to Exhale (1995) Comedy\|Drama \n",
	"5 Father of the Bride Part II (1995) Comedy \n",
	"\n",
	" first_genre \n",
	"item \n",
	"1 Animation \n",
	"2 Adventure \n",
	"3 Comedy \n",
	"4 Comedy \n",
	"5 Comedy "
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"movies = pd.read_csv('./ml-1m/movies.dat', sep='::', names=['item', 'title', 'genres'], index_col='item')\n",
	"movies['first_genre'] = movies.genres.str.split('\|').str.get(0)\n",
	"\n",
	"movies.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/Users/gui/.virtualenvs/gui3/lib/python3.5/site-packages/ipykernel/__main__.py:1: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
	" if __name__ == '__main__':\n"
	]
	},
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>gender</th>\n",
	" <th>age</th>\n",
	" <th>occupation</th>\n",
	" <th>zipcode</th>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>user</th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" <th></th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>F</td>\n",
	" <td>1</td>\n",
	" <td>10</td>\n",
	" <td>48067</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>M</td>\n",
	" <td>56</td>\n",
	" <td>16</td>\n",
	" <td>70072</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>M</td>\n",
	" <td>25</td>\n",
	" <td>15</td>\n",
	" <td>55117</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>M</td>\n",
	" <td>45</td>\n",
	" <td>7</td>\n",
	" <td>02460</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>M</td>\n",
	" <td>25</td>\n",
	" <td>20</td>\n",
	" <td>55455</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" gender age occupation zipcode\n",
	"user \n",
	"1 F 1 10 48067\n",
	"2 M 56 16 70072\n",
	"3 M 25 15 55117\n",
	"4 M 45 7 02460\n",
	"5 M 25 20 55455"
	]
	},
	"execution_count": 18,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# See http://files.grouplens.org/datasets/movielens/ml-1m-README.txt for more details\n",
	"users = pd.read_csv('./ml-1m/users.dat', sep='::', names=['user', 'gender', 'age', 'occupation', 'zipcode'], index_col='user')\n",
	"users.head()"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.0"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}