Skip to content

Instantly share code, notes, and snippets.

@iskandr
Created October 14, 2013 00:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iskandr/6968728 to your computer and use it in GitHub Desktop.
Save iskandr/6968728 to your computer and use it in GitHub Desktop.
cudatree getting suboptimal accuracy on covtype
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "Random Forest accuracy"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from sklearn.ensemble import RandomForestClassifier as SklearnRF"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from cudatree import RandomForestClassifier as CudaRF"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"/usr/lib/python2.7/dist-packages/nose/util.py:14: DeprecationWarning: The compiler package is deprecated and removed in Python 3.x.\n",
" from compiler.consts import CO_GENERATOR\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import sklearn.datasets"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d = sklearn.datasets.fetch_covtype(); x = d['data']; y = d['target']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print x.shape, y.shape"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"(581012, 54) (581012,)\n"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"n = x.shape[0] / 2; xtrain = x[:n]; ytrain = y[:n]; xtest = x[n:]; ytest = y[n:]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"skrf = SklearnRF(n_estimators = 21, n_jobs = 4)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 19
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cudarf = CudaRF()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time skrf.fit(xtrain, ytrain)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 0.35 s, sys: 0.66 s, total: 1.01 s\n",
"Wall time: 19.40 s\n"
]
},
{
"output_type": "pyout",
"prompt_number": 21,
"text": [
"RandomForestClassifier(bootstrap=True, compute_importances=None,\n",
" criterion='gini', max_depth=None, max_features='auto',\n",
" min_density=None, min_samples_leaf=1, min_samples_split=2,\n",
" n_estimators=21, n_jobs=4, oob_score=False, random_state=None,\n",
" verbose=0)"
]
}
],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"time cudarf.fit(xtrain, ytrain, n_trees = 21, bootstrap=False)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 13.90 s, sys: 0.24 s, total: 14.14 s\n",
"Wall time: 14.15 s\n"
]
}
],
"prompt_number": 27
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"sklearn accuracy\", np.mean(skrf.predict(xtest) == ytest)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"sklearn accuracy"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
" 0.724614982135\n"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"cudatree accuracy\", np.mean(cudarf.predict(xtest) == ytest)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"cudatree accuracy "
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"0.633735619918\n"
]
}
],
"prompt_number": 29
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cudarf.predict(xtest)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 25,
"text": [
"array([2, 2, 1, ..., 3, 3, 3], dtype=int32)"
]
}
],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"skrf.predict(xtest)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 26,
"text": [
"array([1, 1, 1, ..., 3, 3, 3], dtype=int32)"
]
}
],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment