Created
October 27, 2013 22:43
-
-
Save tklein23/7188804 to your computer and use it in GitHub Desktop.
StreamingSparseFeatures.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Learning from streamed sparse features" | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 4, | |
"metadata": {}, | |
"source": [ | |
"*Author (GitHub ID: [tklein23](https://github.com/tklein23]))*" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Simple demonstration of how to use sparse features to train a simple online learning." | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Preparing" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import modshogun\n", | |
"# modshogun.get_global_io().set_loglevel(modshogun.MSG_DEBUG & modshogun.MSG_LINE_AND_FILE)\n", | |
"\n", | |
"from modshogun import StreamingAsciiFile\n", | |
"from modshogun import StreamingSparseRealFeatures\n", | |
"from modshogun import BinaryLabels" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"StreamingSparseRealFeatures is an alias to StreamingSparseFeatures< float64_t >." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import os\n", | |
"shogun_data = os.environ['SHOGUN_DATA'] if 'SHOGUN_DATA' in os.environ else '../../../data'" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"shogun_data = '.'\n", | |
"train_file = shogun_data + \"/train_sparsereal.light\"\n", | |
"eval_file = shogun_data + \"/train_sparsereal.light\"" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Loading binary labels from streamed input file:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"filestream = StreamingAsciiFile(eval_file)\n", | |
"eval_feats = StreamingSparseRealFeatures(filestream, True, 1024)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"labels_temp = []\n", | |
"\n", | |
"eval_feats.start_parser()\n", | |
"\n", | |
"while eval_feats.get_next_example():\n", | |
" labels_temp.append(eval_feats.get_label())\n", | |
" eval_feats.release_example()\n", | |
"\n", | |
"eval_feats.end_parser()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy\n", | |
"labels_true = BinaryLabels(numpy.array(labels_temp))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"labels_true.get_labels()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 7, | |
"text": [ | |
"array([ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", | |
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", | |
" 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", | |
" 1., 1., 1., 1., 1., 1., 1., -1., -1., -1., -1., -1., -1.,\n", | |
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n", | |
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n", | |
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.])" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Training" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"filestream = StreamingAsciiFile(train_file)\n", | |
"train_feats = StreamingSparseRealFeatures(filestream, True, 1024)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from modshogun import OnlineSVMSGD\n", | |
"C=1.0\n", | |
"svm = OnlineSVMSGD(C, train_feats)\n", | |
"%time svm.train()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", | |
"Wall time: 1.2 ms\n" | |
] | |
}, | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 9, | |
"text": [ | |
"True" | |
] | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy.linalg\n", | |
"numpy.linalg.norm(svm.get_w(), 2)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 10, | |
"text": [ | |
"325.52298" | |
] | |
} | |
], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Evaluation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"filestream = StreamingAsciiFile(eval_file)\n", | |
"eval_feats = StreamingSparseRealFeatures(filestream, True, 1024)\n", | |
"%time labels_pred = svm.apply_binary(eval_feats)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n", | |
"Wall time: 1.08 ms\n" | |
] | |
} | |
], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"labels_pred.get_labels()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 12, | |
"text": [ | |
"array([-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n", | |
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., 1., -1., -1.,\n", | |
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n", | |
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n", | |
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n", | |
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,\n", | |
" -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.])" | |
] | |
} | |
], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"labels_true.get_labels() == labels_pred.get_labels()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 13, | |
"text": [ | |
"array([False, False, False, False, False, False, False, False, False,\n", | |
" False, False, False, False, False, False, False, False, False,\n", | |
" False, False, False, False, False, True, False, False, False,\n", | |
" False, False, False, False, False, False, False, False, False,\n", | |
" False, False, False, False, False, False, False, False, False,\n", | |
" False, True, True, True, True, True, True, True, True,\n", | |
" True, True, True, True, True, True, True, True, True,\n", | |
" True, True, True, True, True, True, True, True, True,\n", | |
" True, True, True, True, True, True, True, True, True,\n", | |
" True, True, True, True, True, True, True, True, True,\n", | |
" True, True], dtype=bool)" | |
] | |
} | |
], | |
"prompt_number": 13 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"num_labels = labels_true.get_num_labels()\n", | |
"1.0*numpy.count_nonzero(labels_true.get_labels() == labels_pred.get_labels())/num_labels" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 14, | |
"text": [ | |
"0.5108695652173914" | |
] | |
} | |
], | |
"prompt_number": 14 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"References" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"- [StreamingAsciiFile](http://www.shogun-toolbox.org/doc/en/current/StreamingAsciiFile_8cpp.html)\n", | |
"- [StreamingSparseRealFeatures](http://www.shogun-toolbox.org/doc/en/current/StreamingSparseFeatures_8cpp.html)" | |
] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment