Skip to content

Instantly share code, notes, and snippets.

@tklein23
Created October 27, 2013 22:43
Show Gist options
  • Save tklein23/7188804 to your computer and use it in GitHub Desktop.
Save tklein23/7188804 to your computer and use it in GitHub Desktop.
StreamingSparseFeatures.ipynb
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Learning from streamed sparse features"
]
},
{
"cell_type": "heading",
"level": 4,
"metadata": {},
"source": [
"*Author (GitHub ID: [tklein23](https://github.com/tklein23]))*"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Simple demonstration of how to use sparse features to train a simple online learning."
]
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Preparing"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import modshogun\n",
"# modshogun.get_global_io().set_loglevel(modshogun.MSG_DEBUG & modshogun.MSG_LINE_AND_FILE)\n",
"\n",
"from modshogun import StreamingAsciiFile\n",
"from modshogun import StreamingSparseRealFeatures\n",
"from modshogun import BinaryLabels"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"StreamingSparseRealFeatures is an alias to StreamingSparseFeatures< float64_t >."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import os\n",
"shogun_data = os.environ['SHOGUN_DATA'] if 'SHOGUN_DATA' in os.environ else '../../../data'"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"shogun_data = '.'\n",
"train_file = shogun_data + \"/train_sparsereal.light\"\n",
"eval_file = shogun_data + \"/train_sparsereal.light\""
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Loading binary labels from streamed input file:"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"filestream = StreamingAsciiFile(eval_file)\n",
"eval_feats = StreamingSparseRealFeatures(filestream, True, 1024)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"labels_temp = []\n",
"\n",
"eval_feats.start_parser()\n",
"\n",
"while eval_feats.get_next_example():\n",
" labels_temp.append(eval_feats.get_label())\n",
" eval_feats.release_example()\n",
"\n",
"eval_feats.end_parser()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy\n",
"labels_true = BinaryLabels(numpy.array(labels_temp))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"labels_true.get_labels()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"array([ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
" 1., 1., 1., 1., 1., 1., 1., -1., -1., -1., -1., -1., -1.,\n",
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n",
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n",
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.])"
]
}
],
"prompt_number": 7
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Training"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"filestream = StreamingAsciiFile(train_file)\n",
"train_feats = StreamingSparseRealFeatures(filestream, True, 1024)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from modshogun import OnlineSVMSGD\n",
"C=1.0\n",
"svm = OnlineSVMSGD(C, train_feats)\n",
"%time svm.train()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n",
"Wall time: 1.2 ms\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"True"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy.linalg\n",
"numpy.linalg.norm(svm.get_w(), 2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"325.52298"
]
}
],
"prompt_number": 10
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Evaluation"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"filestream = StreamingAsciiFile(eval_file)\n",
"eval_feats = StreamingSparseRealFeatures(filestream, True, 1024)\n",
"%time labels_pred = svm.apply_binary(eval_feats)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n",
"Wall time: 1.08 ms\n"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"labels_pred.get_labels()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
"array([-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n",
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., 1., -1., -1.,\n",
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n",
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n",
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n",
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n",
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.])"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"labels_true.get_labels() == labels_pred.get_labels()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 13,
"text": [
"array([False, False, False, False, False, False, False, False, False,\n",
" False, False, False, False, False, False, False, False, False,\n",
" False, False, False, False, False, True, False, False, False,\n",
" False, False, False, False, False, False, False, False, False,\n",
" False, False, False, False, False, False, False, False, False,\n",
" False, True, True, True, True, True, True, True, True,\n",
" True, True, True, True, True, True, True, True, True,\n",
" True, True, True, True, True, True, True, True, True,\n",
" True, True, True, True, True, True, True, True, True,\n",
" True, True, True, True, True, True, True, True, True,\n",
" True, True], dtype=bool)"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"num_labels = labels_true.get_num_labels()\n",
"1.0*numpy.count_nonzero(labels_true.get_labels() == labels_pred.get_labels())/num_labels"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
"0.5108695652173914"
]
}
],
"prompt_number": 14
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"References"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"- [StreamingAsciiFile](http://www.shogun-toolbox.org/doc/en/current/StreamingAsciiFile_8cpp.html)\n",
"- [StreamingSparseRealFeatures](http://www.shogun-toolbox.org/doc/en/current/StreamingSparseFeatures_8cpp.html)"
]
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment