Skip to content

Instantly share code, notes, and snippets.

@jdnc
Created March 22, 2014 23:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jdnc/9715957 to your computer and use it in GitHub Desktop.
Save jdnc/9715957 to your computer and use it in GitHub Desktop.
Experimenting with Naive Bayes for neurosynth, just the beginning steps
{
"metadata": {
"name": "Naive Bayes Starter"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": "import naive_bayes as nb # the python script which is a work in progress\n# get the numpy feature and target array, replicating the criteria of the Large Scale... paper\nX, y = nb.get_X_y()",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": "# check that X and y are of the expected dimensions and view them\nprint X.ndim\nprint y.ndim\nprint X.size\nprint y.size\nprint X\nprint y",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "2\n1\n1393659176\n1544\n[[ 0. 0. 0. ..., 0. 0. 0.]\n [ 0. 0. 0. ..., 0. 0. 0.]\n [ 0. 0. 0. ..., 0. 0. 0.]\n ..., \n [ 0. 0. 0. ..., 0. 0. 0.]\n [ 0. 0. 0. ..., 0. 0. 0.]\n [ 0. 0. 0. ..., 0. 0. 0.]]\n['visual' 'social' 'visual' ..., 'attention' 'reward' 'executive']\n"
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": "# Since y has string labels encode them to numerical values\nfrom sklearn import preprocessing\nle = preprocessing.LabelEncoder()\nle.fit(y)\n# see all the classes\nle.classes_",
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": "array(['action', 'attention', 'auditory', 'conflict', 'emotion',\n 'encoding', 'episodic', 'executive', 'imagery', 'language', 'pain',\n 'perception', 'phonological', 'recognition', 'retrieval', 'reward',\n 'semantic', 'sensory', 'social', 'spatial', 'verbal', 'visual'], dtype=object)"
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": "# now encode y so that it has numerical classes rather than string\ny_enc = le.transform(y)\ny_enc",
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": "array([21, 18, 21, ..., 1, 15, 7])"
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": "# see the inverse transform just to check\nle.inverse_transform(y_enc)",
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": "array(['visual', 'social', 'visual', ..., 'attention', 'reward',\n 'executive'], dtype=object)"
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": "# Now set up the naive bayes classifier\nfrom sklearn.naive_bayes import MultinomialNB\n# since study assumes uniform prior for each term, set fit_prior to false\nclf = MultinomialNB(fit_prior=False)\n# import cross validation module\nfrom sklearn import cross_validation\n# also the One vs One \nfrom sklearn.multiclass import OneVsOneClassifier",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": "# Roughly this should be the way to proceed\n# however takes too long on my mac, so diverging to learn about deploying it via lonestar\nscores = cross_validation.cross_val_score(OneVsOneClassifier(clf), X, y_enc, cv=10)",
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment