Skip to content

Instantly share code, notes, and snippets.

@devashishd12
Created June 22, 2016 19:57
Show Gist options
  • Save devashishd12/1e8574dcb080206edce5dcba90e93d55 to your computer and use it in GitHub Desktop.
Save devashishd12/1e8574dcb080206edce5dcba90e93d55 to your computer and use it in GitHub Desktop.
Testing coherence for LdaVowpalWabbit wrapper
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from gensim.models.coherencemodel import CoherenceModel\n",
"from gensim.models.ldamodel import LdaModel\n",
"from gensim.corpora.dictionary import Dictionary\n",
"from gensim.models.wrappers import LdaVowpalWabbit\n",
"import numpy as np\n",
"from pprint import pprint\n",
"import test"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"documents = [\"Human machine interface for lab abc computer applications\",\n",
" \"A survey of user opinion of computer system response time\",\n",
" \"The EPS user interface management system\",\n",
" \"System and human system engineering testing of EPS\",\n",
" \"Relation of user perceived response time to error measurement\",\n",
" \"The generation of random binary unordered trees\",\n",
" \"The intersection graph of paths in trees\",\n",
" \"Graph minors IV Widths of trees and well quasi ordering\",\n",
" \"Graph minors A survey\"]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"stoplist = set('for a of the and to in'.split())\n",
"texts = [[word for word in document.lower().split() if word not in stoplist] for document in documents]\n",
"dictionary = Dictionary(texts)\n",
"corpus = [dictionary.doc2bow(text) for text in texts]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"np.random.seed(1)\n",
"topics = []\n",
"tm = LdaModel(corpus=corpus, id2word=dictionary, num_topics=2)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"model1 = LdaVowpalWabbit('/home/devashish/vw-8',corpus=corpus , num_topics=2, id2word=dictionary, passes=1)\n",
"model2 = LdaVowpalWabbit('/home/devashish/vw-8',corpus=corpus , num_topics=2, id2word=dictionary, passes=50)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"cm1 = CoherenceModel(model=model1, corpus=corpus, coherence='u_mass')\n",
"cm2 = CoherenceModel(model=model2, corpus=corpus, coherence='u_mass')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-20.5979947296\n",
"-15.30250407\n"
]
}
],
"source": [
"print cm1.get_coherence()\n",
"print cm2.get_coherence()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment