Created
March 22, 2014 23:17
-
-
Save jdnc/9715957 to your computer and use it in GitHub Desktop.
Experimenting with Naive Bayes for neurosynth, just the beginning steps
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "Naive Bayes Starter" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "import naive_bayes as nb # the python script which is a work in progress\n# get the numpy feature and target array, replicating the criteria of the Large Scale... paper\nX, y = nb.get_X_y()", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# check that X and y are of the expected dimensions and view them\nprint X.ndim\nprint y.ndim\nprint X.size\nprint y.size\nprint X\nprint y", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "2\n1\n1393659176\n1544\n[[ 0. 0. 0. ..., 0. 0. 0.]\n [ 0. 0. 0. ..., 0. 0. 0.]\n [ 0. 0. 0. ..., 0. 0. 0.]\n ..., \n [ 0. 0. 0. ..., 0. 0. 0.]\n [ 0. 0. 0. ..., 0. 0. 0.]\n [ 0. 0. 0. ..., 0. 0. 0.]]\n['visual' 'social' 'visual' ..., 'attention' 'reward' 'executive']\n" | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# Since y has string labels encode them to numerical values\nfrom sklearn import preprocessing\nle = preprocessing.LabelEncoder()\nle.fit(y)\n# see all the classes\nle.classes_", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 6, | |
"text": "array(['action', 'attention', 'auditory', 'conflict', 'emotion',\n 'encoding', 'episodic', 'executive', 'imagery', 'language', 'pain',\n 'perception', 'phonological', 'recognition', 'retrieval', 'reward',\n 'semantic', 'sensory', 'social', 'spatial', 'verbal', 'visual'], dtype=object)" | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# now encode y so that it has numerical classes rather than string\ny_enc = le.transform(y)\ny_enc", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 7, | |
"text": "array([21, 18, 21, ..., 1, 15, 7])" | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# see the inverse transform just to check\nle.inverse_transform(y_enc)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 8, | |
"text": "array(['visual', 'social', 'visual', ..., 'attention', 'reward',\n 'executive'], dtype=object)" | |
} | |
], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# Now set up the naive bayes classifier\nfrom sklearn.naive_bayes import MultinomialNB\n# since study assumes uniform prior for each term, set fit_prior to false\nclf = MultinomialNB(fit_prior=False)\n# import cross validation module\nfrom sklearn import cross_validation\n# also the One vs One \nfrom sklearn.multiclass import OneVsOneClassifier", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": "# Roughly this should be the way to proceed\n# however takes too long on my mac, so diverging to learn about deploying it via lonestar\nscores = cross_validation.cross_val_score(OneVsOneClassifier(clf), X, y_enc, cv=10)", | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment