Skip to content

Instantly share code, notes, and snippets.

@hiroto-takatoshi
Created February 8, 2017 05:19
Show Gist options
  • Save hiroto-takatoshi/be0ced688e10afab5834d90067b6c11d to your computer and use it in GitHub Desktop.
Save hiroto-takatoshi/be0ced688e10afab5834d90067b6c11d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import lda\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"X = lda.utils.ldac2dtm(open('ap.dat'), offset=0)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"with open('vocab.txt') as f:\n",
" vocab = tuple(f.read().split())"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"model = lda.LDA(n_topics=20, n_iter=1500, random_state=1)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:lda:n_documents: 2246\n",
"INFO:lda:vocab_size: 10473\n",
"INFO:lda:n_words: 435838\n",
"INFO:lda:n_topics: 20\n",
"INFO:lda:n_iter: 1500\n",
"INFO:lda:<0> log likelihood: -5476651\n",
"INFO:lda:<10> log likelihood: -4077671\n",
"INFO:lda:<20> log likelihood: -3901140\n",
"INFO:lda:<30> log likelihood: -3837499\n",
"INFO:lda:<40> log likelihood: -3801351\n",
"INFO:lda:<50> log likelihood: -3779830\n",
"INFO:lda:<60> log likelihood: -3763033\n",
"INFO:lda:<70> log likelihood: -3751735\n",
"INFO:lda:<80> log likelihood: -3742309\n",
"INFO:lda:<90> log likelihood: -3733977\n",
"INFO:lda:<100> log likelihood: -3727111\n",
"INFO:lda:<110> log likelihood: -3723015\n",
"INFO:lda:<120> log likelihood: -3718689\n",
"INFO:lda:<130> log likelihood: -3716380\n",
"INFO:lda:<140> log likelihood: -3712261\n",
"INFO:lda:<150> log likelihood: -3710904\n",
"INFO:lda:<160> log likelihood: -3707895\n",
"INFO:lda:<170> log likelihood: -3705384\n",
"INFO:lda:<180> log likelihood: -3704104\n",
"INFO:lda:<190> log likelihood: -3702793\n",
"INFO:lda:<200> log likelihood: -3701623\n",
"INFO:lda:<210> log likelihood: -3698983\n",
"INFO:lda:<220> log likelihood: -3697639\n",
"INFO:lda:<230> log likelihood: -3697154\n",
"INFO:lda:<240> log likelihood: -3694176\n",
"INFO:lda:<250> log likelihood: -3693392\n",
"INFO:lda:<260> log likelihood: -3692639\n",
"INFO:lda:<270> log likelihood: -3690817\n",
"INFO:lda:<280> log likelihood: -3690774\n",
"INFO:lda:<290> log likelihood: -3690850\n",
"INFO:lda:<300> log likelihood: -3690055\n",
"INFO:lda:<310> log likelihood: -3689924\n",
"INFO:lda:<320> log likelihood: -3688729\n",
"INFO:lda:<330> log likelihood: -3687494\n",
"INFO:lda:<340> log likelihood: -3687316\n",
"INFO:lda:<350> log likelihood: -3687768\n",
"INFO:lda:<360> log likelihood: -3686429\n",
"INFO:lda:<370> log likelihood: -3685422\n",
"INFO:lda:<380> log likelihood: -3684581\n",
"INFO:lda:<390> log likelihood: -3683734\n",
"INFO:lda:<400> log likelihood: -3683986\n",
"INFO:lda:<410> log likelihood: -3683651\n",
"INFO:lda:<420> log likelihood: -3683762\n",
"INFO:lda:<430> log likelihood: -3683547\n",
"INFO:lda:<440> log likelihood: -3682437\n",
"INFO:lda:<450> log likelihood: -3682080\n",
"INFO:lda:<460> log likelihood: -3681822\n",
"INFO:lda:<470> log likelihood: -3681101\n",
"INFO:lda:<480> log likelihood: -3682356\n",
"INFO:lda:<490> log likelihood: -3681683\n",
"INFO:lda:<500> log likelihood: -3680661\n",
"INFO:lda:<510> log likelihood: -3680511\n",
"INFO:lda:<520> log likelihood: -3680945\n",
"INFO:lda:<530> log likelihood: -3679211\n",
"INFO:lda:<540> log likelihood: -3678517\n",
"INFO:lda:<550> log likelihood: -3678854\n",
"INFO:lda:<560> log likelihood: -3677041\n",
"INFO:lda:<570> log likelihood: -3677213\n",
"INFO:lda:<580> log likelihood: -3678838\n",
"INFO:lda:<590> log likelihood: -3678379\n",
"INFO:lda:<600> log likelihood: -3677847\n",
"INFO:lda:<610> log likelihood: -3678484\n",
"INFO:lda:<620> log likelihood: -3678668\n",
"INFO:lda:<630> log likelihood: -3677680\n",
"INFO:lda:<640> log likelihood: -3676747\n",
"INFO:lda:<650> log likelihood: -3677636\n",
"INFO:lda:<660> log likelihood: -3677485\n",
"INFO:lda:<670> log likelihood: -3678316\n",
"INFO:lda:<680> log likelihood: -3678462\n",
"INFO:lda:<690> log likelihood: -3677225\n",
"INFO:lda:<700> log likelihood: -3676163\n",
"INFO:lda:<710> log likelihood: -3676840\n",
"INFO:lda:<720> log likelihood: -3676684\n",
"INFO:lda:<730> log likelihood: -3676653\n",
"INFO:lda:<740> log likelihood: -3677127\n",
"INFO:lda:<750> log likelihood: -3678371\n",
"INFO:lda:<760> log likelihood: -3676893\n",
"INFO:lda:<770> log likelihood: -3676615\n",
"INFO:lda:<780> log likelihood: -3675608\n",
"INFO:lda:<790> log likelihood: -3674729\n",
"INFO:lda:<800> log likelihood: -3675266\n",
"INFO:lda:<810> log likelihood: -3675882\n",
"INFO:lda:<820> log likelihood: -3675530\n",
"INFO:lda:<830> log likelihood: -3675181\n",
"INFO:lda:<840> log likelihood: -3675366\n",
"INFO:lda:<850> log likelihood: -3673611\n",
"INFO:lda:<860> log likelihood: -3675897\n",
"INFO:lda:<870> log likelihood: -3675115\n",
"INFO:lda:<880> log likelihood: -3674581\n",
"INFO:lda:<890> log likelihood: -3673408\n",
"INFO:lda:<900> log likelihood: -3674789\n",
"INFO:lda:<910> log likelihood: -3675349\n",
"INFO:lda:<920> log likelihood: -3674951\n",
"INFO:lda:<930> log likelihood: -3675805\n",
"INFO:lda:<940> log likelihood: -3674962\n",
"INFO:lda:<950> log likelihood: -3674557\n",
"INFO:lda:<960> log likelihood: -3674487\n",
"INFO:lda:<970> log likelihood: -3674774\n",
"INFO:lda:<980> log likelihood: -3674333\n",
"INFO:lda:<990> log likelihood: -3673754\n",
"INFO:lda:<1000> log likelihood: -3674767\n",
"INFO:lda:<1010> log likelihood: -3673660\n",
"INFO:lda:<1020> log likelihood: -3674027\n",
"INFO:lda:<1030> log likelihood: -3674486\n",
"INFO:lda:<1040> log likelihood: -3674413\n",
"INFO:lda:<1050> log likelihood: -3673640\n",
"INFO:lda:<1060> log likelihood: -3673161\n",
"INFO:lda:<1070> log likelihood: -3674066\n",
"INFO:lda:<1080> log likelihood: -3674446\n",
"INFO:lda:<1090> log likelihood: -3674491\n",
"INFO:lda:<1100> log likelihood: -3674487\n",
"INFO:lda:<1110> log likelihood: -3675207\n",
"INFO:lda:<1120> log likelihood: -3674765\n",
"INFO:lda:<1130> log likelihood: -3674330\n",
"INFO:lda:<1140> log likelihood: -3674069\n",
"INFO:lda:<1150> log likelihood: -3674506\n",
"INFO:lda:<1160> log likelihood: -3673248\n",
"INFO:lda:<1170> log likelihood: -3672928\n",
"INFO:lda:<1180> log likelihood: -3673012\n",
"INFO:lda:<1190> log likelihood: -3675123\n",
"INFO:lda:<1200> log likelihood: -3673590\n",
"INFO:lda:<1210> log likelihood: -3673205\n",
"INFO:lda:<1220> log likelihood: -3672103\n",
"INFO:lda:<1230> log likelihood: -3672740\n",
"INFO:lda:<1240> log likelihood: -3673131\n",
"INFO:lda:<1250> log likelihood: -3671889\n",
"INFO:lda:<1260> log likelihood: -3672175\n",
"INFO:lda:<1270> log likelihood: -3673819\n",
"INFO:lda:<1280> log likelihood: -3673232\n",
"INFO:lda:<1290> log likelihood: -3674267\n",
"INFO:lda:<1300> log likelihood: -3674006\n",
"INFO:lda:<1310> log likelihood: -3673166\n",
"INFO:lda:<1320> log likelihood: -3672681\n",
"INFO:lda:<1330> log likelihood: -3671634\n",
"INFO:lda:<1340> log likelihood: -3673190\n",
"INFO:lda:<1350> log likelihood: -3673596\n",
"INFO:lda:<1360> log likelihood: -3673502\n",
"INFO:lda:<1370> log likelihood: -3673227\n",
"INFO:lda:<1380> log likelihood: -3672041\n",
"INFO:lda:<1390> log likelihood: -3673048\n",
"INFO:lda:<1400> log likelihood: -3673159\n",
"INFO:lda:<1410> log likelihood: -3671847\n",
"INFO:lda:<1420> log likelihood: -3672968\n",
"INFO:lda:<1430> log likelihood: -3672145\n",
"INFO:lda:<1440> log likelihood: -3671737\n",
"INFO:lda:<1450> log likelihood: -3671631\n",
"INFO:lda:<1460> log likelihood: -3671606\n",
"INFO:lda:<1470> log likelihood: -3672180\n",
"INFO:lda:<1480> log likelihood: -3672690\n",
"INFO:lda:<1490> log likelihood: -3671788\n",
"INFO:lda:<1499> log likelihood: -3670611\n"
]
},
{
"data": {
"text/plain": [
"<lda.lda.LDA at 0x2b448d0e780>"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.fit(X)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Topic 0: soviet gorbachev union west german east germany moscow\n",
"Topic 1: united states iraq foreign israel bush war countries\n",
"Topic 2: bush dukakis campaign president democratic republican jackson presidential\n",
"Topic 3: house committee congress bill senate budget tax federal\n",
"Topic 4: court case attorney judge charges trial federal prison\n",
"Topic 5: city water miles area state fire new southern\n",
"Topic 6: market stock dollar trading late exchange new index\n",
"Topic 7: air flight plane space force navy defense aircraft\n",
"Topic 8: school students news university new women president college\n",
"Topic 9: south government africa president military united rebels african\n",
"Topic 10: police people killed two man army three city\n",
"Topic 11: year new show years john film york i\n",
"Topic 12: children mrs family hospital ms medical i wife\n",
"Topic 13: oil prices cents farmers food futures trade cent\n",
"Topic 14: study aids system environmental computer health program new\n",
"Topic 15: company million new inc corp billion bank co\n",
"Topic 16: i people time years dont think get say\n",
"Topic 17: percent year million billion last sales rate increase\n",
"Topic 18: party government political minister opposition people elections new\n",
"Topic 19: workers union employees strike new labor contract jobs\n"
]
}
],
"source": [
"topic_word = model.topic_word_\n",
"n_top_words = 8\n",
"for i, topic_dist in enumerate(topic_word):\n",
" topic_words = np.array(vocab)[np.argsort(topic_dist)][:-(n_top_words+1):-1]\n",
" print('Topic {}: {}'.format(i, ' '.join(topic_words)))\n"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment