Created
May 27, 2016 17:03
-
-
Save devashishd12/3025af715754d7eda11ed5ce225f8328 to your computer and use it in GitHub Desktop.
Example usage for S_One_Pre segmentation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from gensim.corpora import Dictionary, MmCorpus\n", | |
"from gensim.models.ldamodel import LdaModel\n", | |
"from gensim.segmentation import S_One_Pre\n", | |
"from gensim.matutils import argsort" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"texts = [['human', 'interface', 'computer'],\n", | |
" ['survey', 'user', 'computer', 'system', 'response', 'time'],\n", | |
" ['eps', 'user', 'interface', 'system'],\n", | |
" ['system', 'human', 'system', 'eps'],\n", | |
" ['user', 'response', 'time'],\n", | |
" ['trees'],\n", | |
" ['graph', 'trees'],\n", | |
" ['graph', 'minors', 'trees'],\n", | |
" ['graph', 'minors', 'survey']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"dictionary = Dictionary(texts)\n", | |
"corpus = [dictionary.doc2bow(text) for text in texts]\n", | |
"MmCorpus.serialize('/tmp/deerwester.mm', corpus)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"topics = []\n", | |
"str_topics = []\n", | |
"lm = LdaModel(corpus=corpus)\n", | |
"for topic in lm.state.get_lambda():\n", | |
" topic = topic / topic.sum()\n", | |
" bestn = argsort(topic, topn=3, reverse=True)\n", | |
" topics.append(bestn)\n", | |
" beststr = [(topic[id], lm.id2word[id]) for id in bestn]\n", | |
" str_topics.append(beststr)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[ 9 10 7]\n" | |
] | |
} | |
], | |
"source": [ | |
"print topics[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{0: [(10, 9), (7, 9), (7, 10)]}\n" | |
] | |
} | |
], | |
"source": [ | |
"print S_One_Pre([topics[0]])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{0: [(10, 9), (7, 9), (7, 10)],\n", | |
" 1: [(10, 9), (7, 9), (7, 10)],\n", | |
" 2: [(10, 9), (7, 9), (7, 10)],\n", | |
" 3: [(10, 9), (7, 9), (7, 10)],\n", | |
" 4: [(10, 9), (7, 9), (7, 10)],\n", | |
" 5: [(10, 9), (7, 9), (7, 10)],\n", | |
" 6: [(10, 9), (7, 9), (7, 10)],\n", | |
" 7: [(10, 9), (7, 9), (7, 10)],\n", | |
" 8: [(10, 9), (7, 9), (7, 10)],\n", | |
" 9: [(10, 9), (7, 9), (7, 10)],\n", | |
" 10: [(10, 9), (7, 9), (7, 10)],\n", | |
" 11: [(10, 9), (7, 9), (7, 10)],\n", | |
" 12: [(10, 9), (7, 9), (7, 10)],\n", | |
" 13: [(10, 9), (7, 9), (7, 10)],\n", | |
" 14: [(10, 9), (7, 9), (7, 10)],\n", | |
" 15: [(10, 9), (7, 9), (7, 10)],\n", | |
" 16: [(10, 9), (7, 9), (7, 10)],\n", | |
" 17: [(10, 9), (7, 9), (7, 10)],\n", | |
" 18: [(10, 9), (7, 9), (7, 10)],\n", | |
" 19: [(10, 9), (7, 9), (7, 10)],\n", | |
" 20: [(10, 9), (7, 9), (7, 10)],\n", | |
" 21: [(7, 4), (6, 4), (6, 7)],\n", | |
" 22: [(10, 9), (7, 9), (7, 10)],\n", | |
" 23: [(10, 9), (7, 9), (7, 10)],\n", | |
" 24: [(10, 9), (7, 9), (7, 10)],\n", | |
" 25: [(10, 9), (7, 9), (7, 10)],\n", | |
" 26: [(1, 2), (0, 2), (0, 1)],\n", | |
" 27: [(10, 9), (7, 9), (7, 10)],\n", | |
" 28: [(10, 9), (7, 9), (7, 10)],\n", | |
" 29: [(10, 9), (7, 9), (7, 10)],\n", | |
" 30: [(10, 9), (7, 9), (7, 10)],\n", | |
" 31: [(10, 9), (7, 9), (7, 10)],\n", | |
" 32: [(10, 9), (7, 9), (7, 10)],\n", | |
" 33: [(10, 9), (7, 9), (7, 10)],\n", | |
" 34: [(10, 9), (7, 9), (7, 10)],\n", | |
" 35: [(10, 9), (7, 9), (7, 10)],\n", | |
" 36: [(10, 9), (7, 9), (7, 10)],\n", | |
" 37: [(10, 9), (7, 9), (7, 10)],\n", | |
" 38: [(10, 9), (7, 9), (7, 10)],\n", | |
" 39: [(10, 9), (7, 9), (7, 10)],\n", | |
" 40: [(10, 9), (7, 9), (7, 10)],\n", | |
" 41: [(10, 9), (7, 9), (7, 10)],\n", | |
" 42: [(10, 9), (7, 9), (7, 10)],\n", | |
" 43: [(10, 9), (7, 9), (7, 10)],\n", | |
" 44: [(10, 9), (7, 9), (7, 10)],\n", | |
" 45: [(10, 9), (7, 9), (7, 10)],\n", | |
" 46: [(10, 9), (7, 9), (7, 10)],\n", | |
" 47: [(11, 5), (10, 5), (10, 11)],\n", | |
" 48: [(10, 9), (7, 9), (7, 10)],\n", | |
" 49: [(10, 9), (7, 9), (7, 10)],\n", | |
" 50: [(10, 9), (7, 9), (7, 10)],\n", | |
" 51: [(10, 9), (7, 9), (7, 10)],\n", | |
" 52: [(10, 9), (7, 9), (7, 10)],\n", | |
" 53: [(10, 9), (7, 9), (7, 10)],\n", | |
" 54: [(10, 9), (7, 9), (7, 10)],\n", | |
" 55: [(10, 9), (7, 9), (7, 10)],\n", | |
" 56: [(10, 9), (7, 9), (7, 10)],\n", | |
" 57: [(10, 9), (7, 9), (7, 10)],\n", | |
" 58: [(10, 9), (7, 9), (7, 10)],\n", | |
" 59: [(10, 9), (7, 9), (7, 10)],\n", | |
" 60: [(10, 9), (7, 9), (7, 10)],\n", | |
" 61: [(10, 9), (7, 9), (7, 10)],\n", | |
" 62: [(10, 9), (7, 9), (7, 10)],\n", | |
" 63: [(10, 9), (7, 9), (7, 10)],\n", | |
" 64: [(10, 9), (7, 9), (7, 10)],\n", | |
" 65: [(10, 9), (11, 9), (11, 10)],\n", | |
" 66: [(10, 9), (7, 9), (7, 10)],\n", | |
" 67: [(10, 9), (7, 9), (7, 10)],\n", | |
" 68: [(10, 9), (7, 9), (7, 10)],\n", | |
" 69: [(10, 9), (7, 9), (7, 10)],\n", | |
" 70: [(10, 9), (7, 9), (7, 10)],\n", | |
" 71: [(3, 4), (7, 4), (7, 3)],\n", | |
" 72: [(10, 9), (7, 9), (7, 10)],\n", | |
" 73: [(10, 9), (7, 9), (7, 10)],\n", | |
" 74: [(10, 9), (7, 9), (7, 10)],\n", | |
" 75: [(2, 6), (8, 6), (8, 2)],\n", | |
" 76: [(6, 8), (0, 8), (0, 6)],\n", | |
" 77: [(10, 9), (7, 9), (7, 10)],\n", | |
" 78: [(10, 9), (7, 9), (7, 10)],\n", | |
" 79: [(10, 9), (7, 9), (7, 10)],\n", | |
" 80: [(10, 9), (7, 9), (7, 10)],\n", | |
" 81: [(10, 9), (7, 9), (7, 10)],\n", | |
" 82: [(10, 9), (7, 9), (7, 10)],\n", | |
" 83: [(10, 9), (7, 9), (7, 10)],\n", | |
" 84: [(10, 9), (7, 9), (7, 10)],\n", | |
" 85: [(10, 9), (7, 9), (7, 10)],\n", | |
" 86: [(10, 9), (7, 9), (7, 10)],\n", | |
" 87: [(10, 9), (7, 9), (7, 10)],\n", | |
" 88: [(10, 9), (7, 9), (7, 10)],\n", | |
" 89: [(10, 9), (7, 9), (7, 10)],\n", | |
" 90: [(10, 9), (7, 9), (7, 10)],\n", | |
" 91: [(10, 9), (7, 9), (7, 10)],\n", | |
" 92: [(10, 9), (7, 9), (7, 10)],\n", | |
" 93: [(10, 9), (7, 9), (7, 10)],\n", | |
" 94: [(10, 9), (7, 9), (7, 10)],\n", | |
" 95: [(10, 9), (7, 9), (7, 10)],\n", | |
" 96: [(10, 9), (7, 9), (7, 10)],\n", | |
" 97: [(10, 9), (7, 9), (7, 10)],\n", | |
" 98: [(10, 9), (7, 9), (7, 10)],\n", | |
" 99: [(10, 9), (7, 9), (7, 10)]}" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"S_One_Pre(topics)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment