Skip to content

Instantly share code, notes, and snippets.

@EvanZ
Last active August 29, 2015 14:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save EvanZ/c8dae8b969482637afb1 to your computer and use it in GitHub Desktop.
Save EvanZ/c8dae8b969482637afb1 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": "",
"signature": "sha256:6248457aa29c2f47b8dddd47de0ae38cc80dafb3910d8e7e13a178802f0e73e9"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"units = [['Stephen Curry','Klay Thompson','Harrison Barnes','Draymond Green','Andrew Bogut'],\n",
" ['Stephen Curry','Klay Thompson','Harrison Barnes','Draymond Green','Andrew Bogut'],\n",
" ['Shaun Livingston','Klay Thompson','Harrison Barnes','Draymond Green','Andrew Bogut'],\n",
" ['Shaun Livingston','Klay Thompson','Andre Iguodala','Draymond Green','Andrew Bogut'],\n",
" ['Leandro Barbosa','Shaun Livingston','Andre Iguodala','Harrison Barnes','Draymond Green']]\n",
"\n",
"from sklearn.feature_extraction import DictVectorizer\n",
"v = DictVectorizer(sparse=False)\n",
"list_dicts = []\n",
"for unit in units:\n",
" list_dicts.append({name: 1 for name in unit})\n",
"print(list_dicts)\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[{'Andrew Bogut': 1, 'Klay Thompson': 1, 'Draymond Green': 1, 'Harrison Barnes': 1, 'Stephen Curry': 1}, {'Andrew Bogut': 1, 'Klay Thompson': 1, 'Draymond Green': 1, 'Harrison Barnes': 1, 'Stephen Curry': 1}, {'Klay Thompson': 1, 'Andrew Bogut': 1, 'Shaun Livingston': 1, 'Draymond Green': 1, 'Harrison Barnes': 1}, {'Klay Thompson': 1, 'Andrew Bogut': 1, 'Shaun Livingston': 1, 'Draymond Green': 1, 'Andre Iguodala': 1}, {'Shaun Livingston': 1, 'Leandro Barbosa': 1, 'Harrison Barnes': 1, 'Andre Iguodala': 1, 'Draymond Green': 1}]\n"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"X = v.fit_transform(list_dicts)\n",
"print(X)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[[ 0. 1. 1. 1. 1. 0. 0. 1.]\n",
" [ 0. 1. 1. 1. 1. 0. 0. 1.]\n",
" [ 0. 1. 1. 1. 1. 0. 1. 0.]\n",
" [ 1. 1. 1. 0. 1. 0. 1. 0.]\n",
" [ 1. 0. 1. 1. 0. 1. 1. 0.]]\n"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"x = v.inverse_transform(X)\n",
"print(x)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[{'Klay Thompson': 1.0, 'Andrew Bogut': 1.0, 'Draymond Green': 1.0, 'Harrison Barnes': 1.0, 'Stephen Curry': 1.0}, {'Klay Thompson': 1.0, 'Andrew Bogut': 1.0, 'Draymond Green': 1.0, 'Harrison Barnes': 1.0, 'Stephen Curry': 1.0}, {'Klay Thompson': 1.0, 'Shaun Livingston': 1.0, 'Andrew Bogut': 1.0, 'Draymond Green': 1.0, 'Harrison Barnes': 1.0}, {'Klay Thompson': 1.0, 'Shaun Livingston': 1.0, 'Andrew Bogut': 1.0, 'Draymond Green': 1.0, 'Andre Iguodala': 1.0}, {'Shaun Livingston': 1.0, 'Harrison Barnes': 1.0, 'Leandro Barbosa': 1.0, 'Draymond Green': 1.0, 'Andre Iguodala': 1.0}]\n"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print(v.get_feature_names())"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"['Andre Iguodala', 'Andrew Bogut', 'Draymond Green', 'Harrison Barnes', 'Klay Thompson', 'Leandro Barbosa', 'Shaun Livingston', 'Stephen Curry']\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print(v)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"DictVectorizer(dtype=<type 'numpy.float64'>, separator='=', sparse=False)\n"
]
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import json\n",
"data = []\n",
"with open('/Users/evanzamir/PycharmProjects/sklearn_tutorial/matchups.json') as units_file:\n",
" for j in units_file:\n",
" data.append(json.loads(j))\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 30
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from pprint import pprint\n",
"#d = data[0]\n",
"units = []\n",
"points = []\n",
"poss = []\n",
"for d in data:\n",
" home = d['home']\n",
" away = d['away']\n",
" home_unit = {name:1 for name in d[home]['on']}\n",
" away_unit = {name:-1 for name in d[away]['on']}\n",
" stint = home_unit.copy()\n",
" stint.update(away_unit)\n",
" home_poss = d[home]['stats']['poss']\n",
" away_poss = d[away]['stats']['poss']\n",
" point_diff = d[home]['stats']['pts']-d[away]['stats']['pts']\n",
" units.append(stint)\n",
" points.append(point_diff)\n",
" poss.append((home_poss+away_poss)/2.)\n",
"print(len(units),len(points),len(poss))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(24635, 24635, 24635)\n"
]
}
],
"prompt_number": 80
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"u = DictVectorizer(sparse=False)\n",
"u_mat = u.fit_transform(units)\n",
"pprint(u_mat[:100])\n",
"print(points[:100])\n",
"print(poss[:100])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"array([[ 0., 1., 0., ..., 0., 0., 0.],\n",
" [ 0., 0., 0., ..., 0., 0., 0.],\n",
" [ 0., 0., 0., ..., 0., 0., 0.],\n",
" ..., \n",
" [ 0., 0., 0., ..., 0., 0., 0.],\n",
" [ 0., 0., 0., ..., 0., 0., 0.],\n",
" [ 0., 0., 0., ..., 0., 0., 0.]])\n",
"[-2, 2, 2, -2, -4, 5, 1, -2, -4, 2, -3, 0, 2, -1, 0, -4, 0, 0, 1, -1, -3, -1, -1, -1, 0, 1, 2, 1, -3, 1, -2, 2, 0, 4, -2, -1, 0, -1, -2, -5, 1, 2, 0, 2, -3, 2, 5, -1, 2, 11, -1, 2, -3, 2, 3, 0, -2, -3, 0, 2, 1, 0, -1, 2, 2, -2, 0, 5, 0, -1, 0, 1, 1, 2, -1, 2, 3, -4, -5, 0, 2, 3, -5, 4, -2, -2, 0, 0, -1, 3, 2, -2, 2, -4, -2, -1, -1, -1, -4, 1]\n",
"[3.0, 5.0, 1.5, 2.5, 3.5, 2.5, 1.0, 1.5, 2.0, 1.5, 2.5, 2.0, 4.5, 0.5, 3.5, 3.5, 0.5, 4.0, 2.5, 1.0, 4.5, 2.5, 2.5, 3.0, 1.0, 2.5, 3.0, 0.0, 1.0, 1.0, 3.5, 5.0, 0.0, 3.0, 2.0, 1.5, 1.5, 3.5, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 0.5, 2.5, 2.5, 3.0, 1.5, 5.0, 2.5, 5.0, 5.0, 1.0, 1.0, 1.5, 0.5, 6.0, 0.0, 1.0, 0.5, 1.5, 4.5, 3.5, 1.0, 1.5, 3.5, 2.0, 0.5, 1.0, 0.5, 0.0, 3.0, 1.5, 2.5, 4.0, 5.5, 2.5, 2.5, 0.5, 1.0, 1.5, 2.0, 2.0, 2.0, 3.0, 0.5, 0.5, 1.0, 4.0, 3.0, 4.0, 4.5, 3.0, 1.0, 2.5, 7.0, 2.0, 2.0, 1.0]\n"
]
}
],
"prompt_number": 95
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pprint(u.get_feature_names()[:25])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[u'A.J. Price',\n",
" u'Aaron Brooks',\n",
" u'Aaron Gordon',\n",
" u'Adreian Payne',\n",
" u'Al Horford',\n",
" u'Al Jefferson',\n",
" u'Al-Farouq Aminu',\n",
" u'Alan Anderson',\n",
" u'Alec Burks',\n",
" u'Alex Kirk',\n",
" u'Alex Len',\n",
" u'Alexey Shved',\n",
" u'Alexis Ajinca',\n",
" u'Allen Crabbe',\n",
" u'Alonzo Gee',\n",
" u\"Amar'e Stoudemire\",\n",
" u'Amir Johnson',\n",
" u'Anderson Varejao',\n",
" u'Andre Dawkins',\n",
" u'Andre Drummond',\n",
" u'Andre Iguodala',\n",
" u'Andre Miller',\n",
" u'Andre Roberson',\n",
" u'Andrea Bargnani',\n",
" u'Andrei Kirilenko']\n"
]
}
],
"prompt_number": 97
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pprint(u.inverse_transform(u_mat)[:25])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"[{u'Aaron Brooks': 1.0,\n",
" u'Derrick Rose': 1.0,\n",
" u'Iman Shumpert': -1.0,\n",
" u'J.R. Smith': -1.0,\n",
" u'James Jones': -1.0,\n",
" u'Joakim Noah': 1.0,\n",
" u'Kyrie Irving': -1.0,\n",
" u'Nikola Mirotic': 1.0,\n",
" u'Taj Gibson': 1.0,\n",
" u'Timofey Mozgov': -1.0},\n",
" {u'Chandler Parsons': 1.0,\n",
" u'Dante Exum': -1.0,\n",
" u'Derrick Favors': -1.0,\n",
" u'Dirk Nowitzki': 1.0,\n",
" u'Elijah Millsap': -1.0,\n",
" u'Enes Kanter': -1.0,\n",
" u'Gordon Hayward': -1.0,\n",
" u'Greg Smith': 1.0,\n",
" u'J.J. Barea': 1.0,\n",
" u'Monta Ellis': 1.0},\n",
" {u'Derrick Rose': 1.0,\n",
" u'J.R. Smith': -1.0,\n",
" u'Joakim Noah': 1.0,\n",
" u'Kyrie Irving': -1.0,\n",
" u'LeBron James': -1.0,\n",
" u'Mike Dunleavy': 1.0,\n",
" u'Pau Gasol': 1.0,\n",
" u'Timofey Mozgov': -1.0,\n",
" u'Tony Snell': 1.0,\n",
" u'Tristan Thompson': -1.0},\n",
" {u'C.J. Watson': -1.0,\n",
" u'Damjan Rudez': -1.0,\n",
" u'Dante Cunningham': 1.0,\n",
" u'Ian Mahinmi': -1.0,\n",
" u'Jeff Withey': 1.0,\n",
" u'Luis Scola': -1.0,\n",
" u'Luke Babbitt': 1.0,\n",
" u'Quincy Pondexter': 1.0,\n",
" u'Rodney Stuckey': -1.0,\n",
" u'Toney Douglas': 1.0},\n",
" {u'Andre Iguodala': -1.0,\n",
" u'Anthony Bennett': 1.0,\n",
" u'Chase Budinger': 1.0,\n",
" u'David Lee': -1.0,\n",
" u'Gorgui Dieng': 1.0,\n",
" u'Leandro Barbosa': -1.0,\n",
" u'Marreese Speights': -1.0,\n",
" u'Shabazz Muhammad': 1.0,\n",
" u'Shaun Livingston': -1.0,\n",
" u'Zach LaVine': 1.0},\n",
" {u'Carl Landry': -1.0,\n",
" u'Derrick Williams': -1.0,\n",
" u'Giannis Antetokounmpo': 1.0,\n",
" u'Jason Thompson': -1.0,\n",
" u'Jerryd Bayless': 1.0,\n",
" u\"Johnny O'Bryant\": 1.0,\n",
" u'Nik Stauskas': -1.0,\n",
" u'O.J. Mayo': 1.0,\n",
" u'Ray McCallum': -1.0,\n",
" u'Zaza Pachulia': 1.0},\n",
" {u'Alan Anderson': -1.0,\n",
" u'Beno Udrih': 1.0,\n",
" u'Bojan Bogdanovic': -1.0,\n",
" u'Deron Williams': -1.0,\n",
" u'Jeff Green': 1.0,\n",
" u'Jerome Jordan': -1.0,\n",
" u'Jon Leuer': 1.0,\n",
" u'Kosta Koufos': 1.0,\n",
" u'Mason Plumlee': -1.0,\n",
" u'Tony Allen': 1.0},\n",
" {u'Alan Anderson': -1.0,\n",
" u'Beno Udrih': 1.0,\n",
" u'Brook Lopez': -1.0,\n",
" u'Deron Williams': -1.0,\n",
" u'Jarrett Jack': -1.0,\n",
" u'Jon Leuer': 1.0,\n",
" u'Kosta Koufos': 1.0,\n",
" u'Mason Plumlee': -1.0,\n",
" u'Nick Calathes': 1.0,\n",
" u'Tony Allen': 1.0},\n",
" {u'Aaron Brooks': 1.0,\n",
" u'Ben McLemore': -1.0,\n",
" u'Carl Landry': -1.0,\n",
" u'DeMarcus Cousins': -1.0,\n",
" u'Nikola Mirotic': 1.0,\n",
" u'Pau Gasol': 1.0,\n",
" u'Ray McCallum': -1.0,\n",
" u'Rudy Gay': -1.0,\n",
" u'Taj Gibson': 1.0,\n",
" u'Tony Snell': 1.0},\n",
" {u'Danny Green': -1.0,\n",
" u'Greg Monroe': 1.0,\n",
" u'Jodie Meeks': 1.0,\n",
" u'John Lucas III': 1.0,\n",
" u'Jonas Jerebko': 1.0,\n",
" u'Kentavious Caldwell-Pope': 1.0,\n",
" u'Manu Ginobili': -1.0,\n",
" u'Marco Belinelli': -1.0,\n",
" u'Patrick Mills': -1.0,\n",
" u'Tiago Splitter': -1.0},\n",
" {u'Corey Brewer': -1.0,\n",
" u'Donatas Motiejunas': -1.0,\n",
" u'Gerald Green': 1.0,\n",
" u'Isaiah Thomas': 1.0,\n",
" u'James Harden': -1.0,\n",
" u'Jason Terry': -1.0,\n",
" u'Kostas Papanikolaou': -1.0,\n",
" u'Marcus Morris': 1.0,\n",
" u'Markieff Morris': 1.0,\n",
" u'P.J. Tucker': 1.0},\n",
" {u'Channing Frye': 1.0,\n",
" u'Elfrid Payton': 1.0,\n",
" u'Evan Fournier': 1.0,\n",
" u'Jason Smith': -1.0,\n",
" u'Jose Calderon': -1.0,\n",
" u'Langston Galloway': -1.0,\n",
" u'Louis Amundson': -1.0,\n",
" u'Nikola Vucevic': 1.0,\n",
" u'Tim Hardaway Jr.': -1.0,\n",
" u'Victor Oladipo': 1.0},\n",
" {u'Anthony Morrow': -1.0,\n",
" u'Arron Afflalo': 1.0,\n",
" u'Danilo Gallinari': 1.0,\n",
" u'Darrell Arthur': 1.0,\n",
" u'Dion Waiters': -1.0,\n",
" u'J.J. Hickson': 1.0,\n",
" u'Mitch McGary': -1.0,\n",
" u'Nick Collison': -1.0,\n",
" u'Randy Foye': 1.0,\n",
" u'Reggie Jackson': -1.0},\n",
" {u'Alexis Ajinca': 1.0,\n",
" u'Dante Cunningham': 1.0,\n",
" u'Dante Exum': -1.0,\n",
" u'Gordon Hayward': -1.0,\n",
" u'Luke Babbitt': 1.0,\n",
" u'Rudy Gobert': -1.0,\n",
" u'Steve Novak': -1.0,\n",
" u'Toney Douglas': 1.0,\n",
" u'Trevor Booker': -1.0,\n",
" u'Tyreke Evans': 1.0},\n",
" {u'Austin Rivers': -1.0,\n",
" u'Charlie Villanueva': 1.0,\n",
" u'Devin Harris': 1.0,\n",
" u'Dirk Nowitzki': 1.0,\n",
" u'Glen Davis': -1.0,\n",
" u'Hedo Turkoglu': -1.0,\n",
" u'J.J. Redick': -1.0,\n",
" u'Jamal Crawford': -1.0,\n",
" u'Raymond Felton': 1.0,\n",
" u'Richard Jefferson': 1.0},\n",
" {u'Anthony Tolliver': -1.0,\n",
" u'Greg Monroe': -1.0,\n",
" u'Jason Maxiell': 1.0,\n",
" u'Jeffery Taylor': 1.0,\n",
" u'Jodie Meeks': -1.0,\n",
" u'John Lucas III': -1.0,\n",
" u'Jonas Jerebko': -1.0,\n",
" u'Lance Stephenson': 1.0,\n",
" u'Marvin Williams': 1.0,\n",
" u'P.J. Hairston': 1.0},\n",
" {u'Dante Cunningham': 1.0,\n",
" u'Dante Exum': -1.0,\n",
" u'Jeff Withey': 1.0,\n",
" u'Luke Babbitt': 1.0,\n",
" u'Quincy Pondexter': 1.0,\n",
" u'Rudy Gobert': -1.0,\n",
" u'Steve Novak': -1.0,\n",
" u'Toney Douglas': 1.0,\n",
" u'Trevor Booker': -1.0,\n",
" u'Trey Burke': -1.0},\n",
" {u\"Amar'e Stoudemire\": -1.0,\n",
" u'Chris Andersen': 1.0,\n",
" u'Chris Bosh': 1.0,\n",
" u'Danny Granger': 1.0,\n",
" u'Jason Smith': -1.0,\n",
" u'Lance Thomas': -1.0,\n",
" u'Norris Cole': 1.0,\n",
" u'Shane Larkin': -1.0,\n",
" u'Tim Hardaway Jr.': -1.0,\n",
" u'Tyler Johnson': 1.0},\n",
" {u'Chris Andersen': 1.0,\n",
" u'Chris Bosh': 1.0,\n",
" u'Danny Granger': 1.0,\n",
" u'Jason Smith': -1.0,\n",
" u'Lance Thomas': -1.0,\n",
" u'Louis Amundson': -1.0,\n",
" u'Norris Cole': 1.0,\n",
" u'Shane Larkin': -1.0,\n",
" u'Tim Hardaway Jr.': -1.0,\n",
" u'Tyler Johnson': 1.0},\n",
" {u'Alexis Ajinca': 1.0,\n",
" u'Dante Cunningham': 1.0,\n",
" u'Dante Exum': -1.0,\n",
" u'Gordon Hayward': -1.0,\n",
" u'Luke Babbitt': 1.0,\n",
" u'Quincy Pondexter': 1.0,\n",
" u'Rudy Gobert': -1.0,\n",
" u'Steve Novak': -1.0,\n",
" u'Toney Douglas': 1.0,\n",
" u'Trevor Booker': -1.0},\n",
" {u'Alan Anderson': -1.0,\n",
" u'Bojan Bogdanovic': -1.0,\n",
" u'Brandon Knight': 1.0,\n",
" u'Brook Lopez': -1.0,\n",
" u'Deron Williams': -1.0,\n",
" u'Ersan Ilyasova': 1.0,\n",
" u'Jarrett Jack': -1.0,\n",
" u'Jerryd Bayless': 1.0,\n",
" u'Khris Middleton': 1.0,\n",
" u'O.J. Mayo': 1.0},\n",
" {u'Aaron Gordon': -1.0,\n",
" u'Andre Miller': 1.0,\n",
" u'Dewayne Dedmon': -1.0,\n",
" u'Drew Gooden': 1.0,\n",
" u'Evan Fournier': -1.0,\n",
" u'Kevin Seraphin': 1.0,\n",
" u\"Kyle O'Quinn\": -1.0,\n",
" u'Martell Webster': 1.0,\n",
" u'Rasual Butler': 1.0,\n",
" u'Victor Oladipo': -1.0},\n",
" {u'Aaron Gordon': -1.0,\n",
" u'Andre Miller': 1.0,\n",
" u'Dewayne Dedmon': -1.0,\n",
" u'Drew Gooden': 1.0,\n",
" u'Elfrid Payton': -1.0,\n",
" u'Evan Fournier': -1.0,\n",
" u'Kevin Seraphin': 1.0,\n",
" u'Martell Webster': 1.0,\n",
" u'Maurice Harkless': -1.0,\n",
" u'Rasual Butler': 1.0},\n",
" {u'Aaron Brooks': -1.0,\n",
" u'Dewayne Dedmon': 1.0,\n",
" u\"E'Twaun Moore\": -1.0,\n",
" u'Evan Fournier': 1.0,\n",
" u'Jimmy Butler': -1.0,\n",
" u'Joakim Noah': -1.0,\n",
" u\"Kyle O'Quinn\": 1.0,\n",
" u'Taj Gibson': -1.0,\n",
" u'Tobias Harris': 1.0,\n",
" u'Willie Green': 1.0},\n",
" {u'Corey Brewer': 1.0,\n",
" u'Damian Lillard': -1.0,\n",
" u'Donatas Motiejunas': 1.0,\n",
" u'James Harden': 1.0,\n",
" u'Meyers Leonard': -1.0,\n",
" u'Nicolas Batum': -1.0,\n",
" u'Patrick Beverley': 1.0,\n",
" u'Robin Lopez': -1.0,\n",
" u'Trevor Ariza': 1.0,\n",
" u'Wesley Matthews': -1.0}]\n"
]
}
],
"prompt_number": 96
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment