Skip to content

Instantly share code, notes, and snippets.

@pronojitsaha
Created November 3, 2013 12:53
Show Gist options
  • Save pronojitsaha/7290000 to your computer and use it in GitHub Desktop.
Save pronojitsaha/7290000 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from pandas import *\n",
"from numpy import *"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!head -n 1 recsys.csv"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"755,1,5,2,,4,4,2,2,,3,2,,5,2,5,4,2,5,,\r",
"5277,5,,,2,4,2,1,,,4,3,2,2,,2,,5,1,3,\r",
"1577,,,,5,2,,4,,,1,,1,4,4,1,1,2,3,1,3\r",
"4388,,3,,,,1,2,3,4,,,4,,3,5,,5,1,1,2\r",
"1202,4,3,4,1,4,1,,4,,1,5,1,,4,,3,5,5,,\r",
"3823,2,4,4,4,,,3,1,4,4,5,2,4,,1,,,3,,2\r",
"5448,4,,3,1,1,4,,5,2,,1,,,3,,1,,,5,2\r",
"5347,4,,,,3,2,2,,3,,,2,1,2,4,,1,3,5,\r",
"4117,5,1,,4,2,4,4,4,,1,2,,1,,5,,,,,5\r",
"2765,4,2,,5,3,,4,3,4,,,,2,,,2,5,1,,\r",
"5450,2,1,5,,,5,5,,,,,3,2,,,1,,2,1,4\r",
"139,3,5,2,,,,2,,1,,3,,3,,2,5,,,,2\r",
"1940,2,,,5,4,,4,5,,,,2,4,,3,,,,5,\r",
"3118,3,,3,,2,,3,,,4,,1,2,2,3,5,1,,,\r",
"4656,4,4,,,5,5,2,,3,5,,1,3,,2,,3,,3,1\r",
"4796,,,1,,3,2,,2,,1,5,,2,,,2,2,4,3,4\r",
"6037,,,,,,,2,,2,,2,,3,,3,4,,,,\r",
"3048,4,5,1,5,1,1,4,,5,,,,,4,,,2,1,2,5\r",
"4790,5,1,3,,,4,2,1,3,3,3,1,,,,2,,,,\r",
"4489,1,2,2,4,5,,2,3,2,2,1,,4,5,5,4,3,5,3,"
]
}
],
"prompt_number": 2
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The below reads the data into a NumPy array by taking each column (in the form of a list) one after other."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data = genfromtxt(\"recsys.csv\", delimiter=',')\n",
"data.shape"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"(20, 21)"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"m,n = data.shape\n",
"m, n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"(20, 21)"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data[:,1]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"array([ 1., 5., nan, nan, 4., 2., 4., 4., 5., 4., 2.,\n",
" 3., 2., 3., 4., nan, nan, 4., 5., 1.])"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data[(data[:,0] > 100), 0]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"array([ 755., 5277., 1577., 4388., 1202., 3823., 5448., 5347.,\n",
" 4117., 2765., 5450., 139., 1940., 3118., 4656., 4796.,\n",
" 6037., 3048., 4790., 4489.])"
]
}
],
"prompt_number": 6
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Mean Ratings: (disregarding the nulls)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from collections import defaultdict\n",
"a = defaultdict(lambda:0)\n",
"for i in range(1,n):\n",
" mask = data[:,i]>0\n",
" t = len(data[mask, i])\n",
" s = sum(data[mask,i])\n",
" a[i] = (s/float(t))\n",
"print a \n",
"sorted(a.items(), key = lambda (key, value): value)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"defaultdict(<function <lambda> at 0x1064eec80>, {1: 3.3125, 2: 3.0, 3: 2.7272727272727271, 4: 3.6000000000000001, 5: 3.0714285714285716, 6: 2.9166666666666665, 7: 2.8235294117647061, 8: 3.0, 9: 3.0, 10: 2.6363636363636362, 11: 2.9090909090909092, 12: 1.8181818181818181, 13: 2.7999999999999998, 14: 3.2222222222222223, 15: 3.1538461538461537, 16: 2.8333333333333335, 17: 3.0, 18: 2.8333333333333335, 19: 2.9090909090909092, 20: 3.0})\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"[(12, 1.8181818181818181),\n",
" (10, 2.6363636363636362),\n",
" (3, 2.7272727272727271),\n",
" (13, 2.7999999999999998),\n",
" (7, 2.8235294117647061),\n",
" (16, 2.8333333333333335),\n",
" (18, 2.8333333333333335),\n",
" (11, 2.9090909090909092),\n",
" (19, 2.9090909090909092),\n",
" (6, 2.9166666666666665),\n",
" (2, 3.0),\n",
" (8, 3.0),\n",
" (9, 3.0),\n",
" (17, 3.0),\n",
" (20, 3.0),\n",
" (5, 3.0714285714285716),\n",
" (15, 3.1538461538461537),\n",
" (14, 3.2222222222222223),\n",
" (1, 3.3125),\n",
" (4, 3.6000000000000001)]"
]
}
],
"prompt_number": 7
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Ratings >= 4"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from collections import defaultdict\n",
"b = defaultdict(lambda:0)\n",
"for i in range(1,n):\n",
" mask1 = data[:,i]>0\n",
" mask2 = data[:,i]>3\n",
" t = len(data[mask1, i])\n",
" s = len(data[mask2,i])\n",
" b[i] = (s/float(t))\n",
"print b \n",
"sorted(b.items(), key = lambda (key, value): value)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"defaultdict(<function <lambda> at 0x1064eede8>, {1: 0.5625, 2: 0.4166666666666667, 3: 0.2727272727272727, 4: 0.7, 5: 0.42857142857142855, 6: 0.5, 7: 0.35294117647058826, 8: 0.36363636363636365, 9: 0.36363636363636365, 10: 0.36363636363636365, 11: 0.2727272727272727, 12: 0.09090909090909091, 13: 0.3333333333333333, 14: 0.4444444444444444, 15: 0.38461538461538464, 16: 0.4166666666666667, 17: 0.3333333333333333, 18: 0.3333333333333333, 19: 0.2727272727272727, 20: 0.4})\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"[(12, 0.09090909090909091),\n",
" (3, 0.2727272727272727),\n",
" (11, 0.2727272727272727),\n",
" (19, 0.2727272727272727),\n",
" (13, 0.3333333333333333),\n",
" (17, 0.3333333333333333),\n",
" (18, 0.3333333333333333),\n",
" (7, 0.35294117647058826),\n",
" (8, 0.36363636363636365),\n",
" (9, 0.36363636363636365),\n",
" (10, 0.36363636363636365),\n",
" (15, 0.38461538461538464),\n",
" (20, 0.4),\n",
" (2, 0.4166666666666667),\n",
" (16, 0.4166666666666667),\n",
" (5, 0.42857142857142855),\n",
" (14, 0.4444444444444444),\n",
" (6, 0.5),\n",
" (1, 0.5625),\n",
" (4, 0.7)]"
]
}
],
"prompt_number": 8
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Count"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from collections import defaultdict\n",
"c = defaultdict(lambda:0)\n",
"for i in range(1,n):\n",
" mask = data[:,i]>0\n",
" t = len(data[mask, i])\n",
" c[i] = int(t)\n",
"print c \n",
"sorted(c.items(), key = lambda (key, value): value)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"defaultdict(<function <lambda> at 0x1064eef50>, {1: 16, 2: 12, 3: 11, 4: 10, 5: 14, 6: 12, 7: 17, 8: 11, 9: 11, 10: 11, 11: 11, 12: 11, 13: 15, 14: 9, 15: 13, 16: 12, 17: 12, 18: 12, 19: 11, 20: 10})\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"[(14, 9),\n",
" (4, 10),\n",
" (20, 10),\n",
" (3, 11),\n",
" (8, 11),\n",
" (9, 11),\n",
" (10, 11),\n",
" (11, 11),\n",
" (12, 11),\n",
" (19, 11),\n",
" (2, 12),\n",
" (6, 12),\n",
" (16, 12),\n",
" (17, 12),\n",
" (18, 12),\n",
" (15, 13),\n",
" (5, 14),\n",
" (13, 15),\n",
" (1, 16),\n",
" (7, 17)]"
]
}
],
"prompt_number": 9
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Association"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from collections import defaultdict\n",
"d = defaultdict(lambda:0)\n",
"mask = data[:,1]>0\n",
"t = len(data[mask, 1])\n",
"for i in range(2,n):\n",
" s = len(data[(data[:,1]*data[:,i])>0,i])\n",
" d[i] = s/float(t)\n",
"print d\n",
"sorted(d.items(), key = lambda (key, value): value)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"defaultdict(<function <lambda> at 0x1064fe050>, {2: 0.6875, 3: 0.625, 4: 0.5625, 5: 0.75, 6: 0.625, 7: 0.875, 8: 0.5625, 9: 0.5625, 10: 0.5625, 11: 0.5625, 12: 0.5625, 13: 0.75, 14: 0.4375, 15: 0.625, 16: 0.5625, 17: 0.5625, 18: 0.5625, 19: 0.5, 20: 0.4375})\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"[(14, 0.4375),\n",
" (20, 0.4375),\n",
" (19, 0.5),\n",
" (4, 0.5625),\n",
" (8, 0.5625),\n",
" (9, 0.5625),\n",
" (10, 0.5625),\n",
" (11, 0.5625),\n",
" (12, 0.5625),\n",
" (16, 0.5625),\n",
" (17, 0.5625),\n",
" (18, 0.5625),\n",
" (3, 0.625),\n",
" (6, 0.625),\n",
" (15, 0.625),\n",
" (2, 0.6875),\n",
" (5, 0.75),\n",
" (13, 0.75),\n",
" (7, 0.875)]"
]
}
],
"prompt_number": 10
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Assignment in Pandas"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data = pd.read_csv('recsys-data.csv', index_col=0, header=0)\n",
"data"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>260: Star Wars: Episode IV - A New Hope (1977)</th>\n",
" <th>1210: Star Wars: Episode VI - Return of the Jedi (1983)</th>\n",
" <th>356: Forrest Gump (1994)</th>\n",
" <th>318: Shawshank Redemption, The (1994)</th>\n",
" <th>593: Silence of the Lambs, The (1991)</th>\n",
" <th>3578: Gladiator (2000)</th>\n",
" <th>1: Toy Story (1995)</th>\n",
" <th>2028: Saving Private Ryan (1998)</th>\n",
" <th>296: Pulp Fiction (1994)</th>\n",
" <th>1259: Stand by Me (1986)</th>\n",
" <th>2396: Shakespeare in Love (1998)</th>\n",
" <th>2916: Total Recall (1990)</th>\n",
" <th>780: Independence Day (ID4) (1996)</th>\n",
" <th>541: Blade Runner (1982)</th>\n",
" <th>1265: Groundhog Day (1993)</th>\n",
" <th>2571: Matrix, The (1999)</th>\n",
" <th>527: Schindler's List (1993)</th>\n",
" <th>2762: Sixth Sense, The (1999)</th>\n",
" <th>1198: Raiders of the Lost Ark (1981)</th>\n",
" <th>34: Babe (1995)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>User</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>755 </th>\n",
" <td> 1</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td> 5</td>\n",
" <td> 4</td>\n",
" <td> 2</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5277</th>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1577</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4388</th>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1202</th>\n",
" <td> 4</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> 1</td>\n",
" <td> 4</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3823</th>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 1</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5448</th>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5347</th>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4117</th>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2765</th>\n",
" <td> 4</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5450</th>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139 </th>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1940</th>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3118</th>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4656</th>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4796</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6037</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3048</th>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4790</th>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" <td> 3</td>\n",
" <td> 3</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4489</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td> 5</td>\n",
" <td> 4</td>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
" 260: Star Wars: Episode IV - A New Hope (1977) \\\n",
"User \n",
"755 1 \n",
"5277 5 \n",
"1577 NaN \n",
"4388 NaN \n",
"1202 4 \n",
"3823 2 \n",
"5448 4 \n",
"5347 4 \n",
"4117 5 \n",
"2765 4 \n",
"5450 2 \n",
"139 3 \n",
"1940 2 \n",
"3118 3 \n",
"4656 4 \n",
"4796 NaN \n",
"6037 NaN \n",
"3048 4 \n",
"4790 5 \n",
"4489 1 \n",
"\n",
" 1210: Star Wars: Episode VI - Return of the Jedi (1983) \\\n",
"User \n",
"755 5 \n",
"5277 NaN \n",
"1577 NaN \n",
"4388 3 \n",
"1202 3 \n",
"3823 4 \n",
"5448 NaN \n",
"5347 NaN \n",
"4117 1 \n",
"2765 2 \n",
"5450 1 \n",
"139 5 \n",
"1940 NaN \n",
"3118 NaN \n",
"4656 4 \n",
"4796 NaN \n",
"6037 NaN \n",
"3048 5 \n",
"4790 1 \n",
"4489 2 \n",
"\n",
" 356: Forrest Gump (1994) 318: Shawshank Redemption, The (1994) \\\n",
"User \n",
"755 2 NaN \n",
"5277 NaN 2 \n",
"1577 NaN 5 \n",
"4388 NaN NaN \n",
"1202 4 1 \n",
"3823 4 4 \n",
"5448 3 1 \n",
"5347 NaN NaN \n",
"4117 NaN 4 \n",
"2765 NaN 5 \n",
"5450 5 NaN \n",
"139 2 NaN \n",
"1940 NaN 5 \n",
"3118 3 NaN \n",
"4656 NaN NaN \n",
"4796 1 NaN \n",
"6037 NaN NaN \n",
"3048 1 5 \n",
"4790 3 NaN \n",
"4489 2 4 \n",
"\n",
" 593: Silence of the Lambs, The (1991) 3578: Gladiator (2000) \\\n",
"User \n",
"755 4 4 \n",
"5277 4 2 \n",
"1577 2 NaN \n",
"4388 NaN 1 \n",
"1202 4 1 \n",
"3823 NaN NaN \n",
"5448 1 4 \n",
"5347 3 2 \n",
"4117 2 4 \n",
"2765 3 NaN \n",
"5450 NaN 5 \n",
"139 NaN NaN \n",
"1940 4 NaN \n",
"3118 2 NaN \n",
"4656 5 5 \n",
"4796 3 2 \n",
"6037 NaN NaN \n",
"3048 1 1 \n",
"4790 NaN 4 \n",
"4489 5 NaN \n",
"\n",
" 1: Toy Story (1995) 2028: Saving Private Ryan (1998) \\\n",
"User \n",
"755 2 2 \n",
"5277 1 NaN \n",
"1577 4 NaN \n",
"4388 2 3 \n",
"1202 NaN 4 \n",
"3823 3 1 \n",
"5448 NaN 5 \n",
"5347 2 NaN \n",
"4117 4 4 \n",
"2765 4 3 \n",
"5450 5 NaN \n",
"139 2 NaN \n",
"1940 4 5 \n",
"3118 3 NaN \n",
"4656 2 NaN \n",
"4796 NaN 2 \n",
"6037 2 NaN \n",
"3048 4 NaN \n",
"4790 2 1 \n",
"4489 2 3 \n",
"\n",
" 296: Pulp Fiction (1994) 1259: Stand by Me (1986) \\\n",
"User \n",
"755 NaN 3 \n",
"5277 NaN 4 \n",
"1577 NaN 1 \n",
"4388 4 NaN \n",
"1202 NaN 1 \n",
"3823 4 4 \n",
"5448 2 NaN \n",
"5347 3 NaN \n",
"4117 NaN 1 \n",
"2765 4 NaN \n",
"5450 NaN NaN \n",
"139 1 NaN \n",
"1940 NaN NaN \n",
"3118 NaN 4 \n",
"4656 3 5 \n",
"4796 NaN 1 \n",
"6037 2 NaN \n",
"3048 5 NaN \n",
"4790 3 3 \n",
"4489 2 2 \n",
"\n",
" 2396: Shakespeare in Love (1998) 2916: Total Recall (1990) \\\n",
"User \n",
"755 2 NaN \n",
"5277 3 2 \n",
"1577 NaN 1 \n",
"4388 NaN 4 \n",
"1202 5 1 \n",
"3823 5 2 \n",
"5448 1 NaN \n",
"5347 NaN 2 \n",
"4117 2 NaN \n",
"2765 NaN NaN \n",
"5450 NaN 3 \n",
"139 3 NaN \n",
"1940 NaN 2 \n",
"3118 NaN 1 \n",
"4656 NaN 1 \n",
"4796 5 NaN \n",
"6037 2 NaN \n",
"3048 NaN NaN \n",
"4790 3 1 \n",
"4489 1 NaN \n",
"\n",
" 780: Independence Day (ID4) (1996) 541: Blade Runner (1982) \\\n",
"User \n",
"755 5 2 \n",
"5277 2 NaN \n",
"1577 4 4 \n",
"4388 NaN 3 \n",
"1202 NaN 4 \n",
"3823 4 NaN \n",
"5448 NaN 3 \n",
"5347 1 2 \n",
"4117 1 NaN \n",
"2765 2 NaN \n",
"5450 2 NaN \n",
"139 3 NaN \n",
"1940 4 NaN \n",
"3118 2 2 \n",
"4656 3 NaN \n",
"4796 2 NaN \n",
"6037 3 NaN \n",
"3048 NaN 4 \n",
"4790 NaN NaN \n",
"4489 4 5 \n",
"\n",
" 1265: Groundhog Day (1993) 2571: Matrix, The (1999) \\\n",
"User \n",
"755 5 4 \n",
"5277 2 NaN \n",
"1577 1 1 \n",
"4388 5 NaN \n",
"1202 NaN 3 \n",
"3823 1 NaN \n",
"5448 NaN 1 \n",
"5347 4 NaN \n",
"4117 5 NaN \n",
"2765 NaN 2 \n",
"5450 NaN 1 \n",
"139 2 5 \n",
"1940 3 NaN \n",
"3118 3 5 \n",
"4656 2 NaN \n",
"4796 NaN 2 \n",
"6037 3 4 \n",
"3048 NaN NaN \n",
"4790 NaN 2 \n",
"4489 5 4 \n",
"\n",
" 527: Schindler's List (1993) 2762: Sixth Sense, The (1999) \\\n",
"User \n",
"755 2 5 \n",
"5277 5 1 \n",
"1577 2 3 \n",
"4388 5 1 \n",
"1202 5 5 \n",
"3823 NaN 3 \n",
"5448 NaN NaN \n",
"5347 1 3 \n",
"4117 NaN NaN \n",
"2765 5 1 \n",
"5450 NaN 2 \n",
"139 NaN NaN \n",
"1940 NaN NaN \n",
"3118 1 NaN \n",
"4656 3 NaN \n",
"4796 2 4 \n",
"6037 NaN NaN \n",
"3048 2 1 \n",
"4790 NaN NaN \n",
"4489 3 5 \n",
"\n",
" 1198: Raiders of the Lost Ark (1981) 34: Babe (1995) \n",
"User \n",
"755 NaN NaN \n",
"5277 3 NaN \n",
"1577 1 3 \n",
"4388 1 2 \n",
"1202 NaN NaN \n",
"3823 NaN 2 \n",
"5448 5 2 \n",
"5347 5 NaN \n",
"4117 NaN 5 \n",
"2765 NaN NaN \n",
"5450 1 4 \n",
"139 NaN 2 \n",
"1940 5 NaN \n",
"3118 NaN NaN \n",
"4656 3 1 \n",
"4796 3 4 \n",
"6037 NaN NaN \n",
"3048 2 5 \n",
"4790 NaN NaN \n",
"4489 3 NaN "
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"movie_list = data.columns\n",
"data.columns = np.arange(20)\n",
"data"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>10</th>\n",
" <th>11</th>\n",
" <th>12</th>\n",
" <th>13</th>\n",
" <th>14</th>\n",
" <th>15</th>\n",
" <th>16</th>\n",
" <th>17</th>\n",
" <th>18</th>\n",
" <th>19</th>\n",
" </tr>\n",
" <tr>\n",
" <th>User</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>755 </th>\n",
" <td> 1</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td> 5</td>\n",
" <td> 4</td>\n",
" <td> 2</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5277</th>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1577</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4388</th>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1202</th>\n",
" <td> 4</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td> 1</td>\n",
" <td> 4</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3823</th>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 1</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5448</th>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5347</th>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4117</th>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2765</th>\n",
" <td> 4</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5450</th>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139 </th>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1940</th>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3118</th>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4656</th>\n",
" <td> 4</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td> 5</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4796</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 1</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6037</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td> 3</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3048</th>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td> 1</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4790</th>\n",
" <td> 5</td>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td> 3</td>\n",
" <td> 3</td>\n",
" <td> 3</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4489</th>\n",
" <td> 1</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td>NaN</td>\n",
" <td> 2</td>\n",
" <td> 3</td>\n",
" <td> 2</td>\n",
" <td> 2</td>\n",
" <td> 1</td>\n",
" <td>NaN</td>\n",
" <td> 4</td>\n",
" <td> 5</td>\n",
" <td> 5</td>\n",
" <td> 4</td>\n",
" <td> 3</td>\n",
" <td> 5</td>\n",
" <td> 3</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 13,
"text": [
" 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 \\\n",
"User \n",
"755 1 5 2 NaN 4 4 2 2 NaN 3 2 NaN 5 2 5 4 2 5 \n",
"5277 5 NaN NaN 2 4 2 1 NaN NaN 4 3 2 2 NaN 2 NaN 5 1 \n",
"1577 NaN NaN NaN 5 2 NaN 4 NaN NaN 1 NaN 1 4 4 1 1 2 3 \n",
"4388 NaN 3 NaN NaN NaN 1 2 3 4 NaN NaN 4 NaN 3 5 NaN 5 1 \n",
"1202 4 3 4 1 4 1 NaN 4 NaN 1 5 1 NaN 4 NaN 3 5 5 \n",
"3823 2 4 4 4 NaN NaN 3 1 4 4 5 2 4 NaN 1 NaN NaN 3 \n",
"5448 4 NaN 3 1 1 4 NaN 5 2 NaN 1 NaN NaN 3 NaN 1 NaN NaN \n",
"5347 4 NaN NaN NaN 3 2 2 NaN 3 NaN NaN 2 1 2 4 NaN 1 3 \n",
"4117 5 1 NaN 4 2 4 4 4 NaN 1 2 NaN 1 NaN 5 NaN NaN NaN \n",
"2765 4 2 NaN 5 3 NaN 4 3 4 NaN NaN NaN 2 NaN NaN 2 5 1 \n",
"5450 2 1 5 NaN NaN 5 5 NaN NaN NaN NaN 3 2 NaN NaN 1 NaN 2 \n",
"139 3 5 2 NaN NaN NaN 2 NaN 1 NaN 3 NaN 3 NaN 2 5 NaN NaN \n",
"1940 2 NaN NaN 5 4 NaN 4 5 NaN NaN NaN 2 4 NaN 3 NaN NaN NaN \n",
"3118 3 NaN 3 NaN 2 NaN 3 NaN NaN 4 NaN 1 2 2 3 5 1 NaN \n",
"4656 4 4 NaN NaN 5 5 2 NaN 3 5 NaN 1 3 NaN 2 NaN 3 NaN \n",
"4796 NaN NaN 1 NaN 3 2 NaN 2 NaN 1 5 NaN 2 NaN NaN 2 2 4 \n",
"6037 NaN NaN NaN NaN NaN NaN 2 NaN 2 NaN 2 NaN 3 NaN 3 4 NaN NaN \n",
"3048 4 5 1 5 1 1 4 NaN 5 NaN NaN NaN NaN 4 NaN NaN 2 1 \n",
"4790 5 1 3 NaN NaN 4 2 1 3 3 3 1 NaN NaN NaN 2 NaN NaN \n",
"4489 1 2 2 4 5 NaN 2 3 2 2 1 NaN 4 5 5 4 3 5 \n",
"\n",
" 18 19 \n",
"User \n",
"755 NaN NaN \n",
"5277 3 NaN \n",
"1577 1 3 \n",
"4388 1 2 \n",
"1202 NaN NaN \n",
"3823 NaN 2 \n",
"5448 5 2 \n",
"5347 5 NaN \n",
"4117 NaN 5 \n",
"2765 NaN NaN \n",
"5450 1 4 \n",
"139 NaN 2 \n",
"1940 5 NaN \n",
"3118 NaN NaN \n",
"4656 3 1 \n",
"4796 3 4 \n",
"6037 NaN NaN \n",
"3048 2 5 \n",
"4790 NaN NaN \n",
"4489 3 NaN "
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"[movie_list[item] for item in list(data.apply(np.mean, axis=0).order(ascending=False).index)][:5]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
"['318: Shawshank Redemption, The (1994)',\n",
" '260: Star Wars: Episode IV - A New Hope (1977)',\n",
" '541: Blade Runner (1982)',\n",
" '1265: Groundhog Day (1993)',\n",
" '593: Silence of the Lambs, The (1991)']"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"[movie_list[item] for item in list(data.count(axis=0).order(ascending=False).index)][:5]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 15,
"text": [
"['1: Toy Story (1995)',\n",
" '260: Star Wars: Episode IV - A New Hope (1977)',\n",
" '780: Independence Day (ID4) (1996)',\n",
" '593: Silence of the Lambs, The (1991)',\n",
" '1265: Groundhog Day (1993)']"
]
}
],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"count = pd.Series(data.count(axis=0), dtype=float)\n",
"newdata = data[data > 3]\n",
"new_count = pd.Series(newdata.count(axis=0), dtype= float)\n",
"[movie_list[item] for item in list((new_count/count).order(ascending=False).index)][:5]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 94,
"text": [
"['318: Shawshank Redemption, The (1994)',\n",
" '260: Star Wars: Episode IV - A New Hope (1977)',\n",
" '3578: Gladiator (2000)',\n",
" '541: Blade Runner (1982)',\n",
" '593: Silence of the Lambs, The (1991)']"
]
}
],
"prompt_number": 94
},
{
"cell_type": "code",
"collapsed": true,
"input": [
"dum = pd.Series(data[0] + data[x] for x in arange(1,20))\n",
"list = {}\n",
"for i in range(0,19):\n",
" list[i] = (dum[i].count()/float(data[0].count()))\n",
"sorted(list.items(), key = lambda (key, value): value)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 166,
"text": [
"[(12, 0.4375),\n",
" (18, 0.4375),\n",
" (17, 0.5),\n",
" (2, 0.5625),\n",
" (6, 0.5625),\n",
" (7, 0.5625),\n",
" (8, 0.5625),\n",
" (9, 0.5625),\n",
" (10, 0.5625),\n",
" (14, 0.5625),\n",
" (15, 0.5625),\n",
" (16, 0.5625),\n",
" (1, 0.625),\n",
" (4, 0.625),\n",
" (13, 0.625),\n",
" (0, 0.6875),\n",
" (3, 0.75),\n",
" (11, 0.75),\n",
" (5, 0.875)]"
]
}
],
"prompt_number": 166
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment