Skip to content

Instantly share code, notes, and snippets.

View rchurch4's full-sized avatar

Rob Churchill rchurch4

View GitHub Profile
from gdtm.models import NLDA
# Set these paths to the path where you saved the Mallet implementation of each model, plus bin/mallet
tnd_path = 'path/to/mallet-tnd/bin/mallet'
lda_path = 'path/to/mallet-lda/bin/mallet'
# We pass in the paths to the java code along with the data set and whatever parameters we want to set
model = NLDA(dataset=dataset, mallet_tnd_path=tnd_path, mallet_lda_path=lda_path,
tnd_k=30, lda_k=30, nlda_phi=10, top_words=20)
from gdtm.helpers.common import load_flat_dataset
dataset = load_flat_dataset('sample_tweets.csv', delimiter=' ')
# dataset[0] : ['obama', 'calls', 'surrender', 'black', ...]
from gdtm.models import TND
# Set these paths to the path where you saved the Mallet implementation of each model, plus bin/mallet
tnd_path = 'path/to/mallet-tnd/bin/mallet'
# We pass in the paths to the java code along with the data set and whatever parameters we want to set
model = TND(dataset=dataset, mallet_path=tnd_path, k=30, beta1=16, top_words=20)
topics = model.get_topics()
noise = model.get_noise_distribution()
from gdtm.models import GTM
# Set these paths to the path where you saved the Mallet implementation of each model, plus bin/mallet
tnd_path = 'path/to/mallet-tnd/bin/mallet'
gtm_path = 'path/to/mallet-gtm/bin/mallet'
seed_topics_file = 'data/seed_topics.csv'
# We pass in the paths to the java code along with the data set and whatever parameters we want to set
model = GTM(dataset=dataset, mallet_tnd_path=tnd_path, mallet_gtm_path=gtm_path,
voting vote delegates
party republican convention
from gdtm.helpers.common import save_topics
# Pass in the topics as they are returned from `get_topics()`, a file path, and optionally, a delimiter (the default is a comma)
save_topics(topics, 'results/topics.csv', delimiter=',')
racist gun violence police worst kind allowed rep families loved
party years democratic democrat republican moment ago political children left
lets youre follow retweet twitter patriots hey heart followers happy
care health plan coronavirus public healthcare congress reminder message word
votebluetosaveamerica gop biden2020 voteblue voteblue2020 dumptrump votebluetoendthisnightmare votebluenomatterwho resist whitehouse
democrats american support voters dems jobs hate supporters law wont
trump2020 kag potus kag2020 tcot maga2020 tombx7m trump2020landslide ccot foxandfriends
coronavirus covid covid19 pandemic mask
economy economic reopen unemployment stimulus
racism black police protest prison
health healthcare insurance medicare obamacare
russia international europe asia arab
climate global environment green warming
vice president pence kamala harris
campaign advertisement fundraising rally speech
debate debates moderator question answer
mailin ballot absentee voter fraud
We can make this file beautiful and searchable if this error is corrected: It looks like row 9 should actually have 15 columns, instead of 10. in line 8.
covid,coronavirus,pandemic,tombx7m,mask,kag2020,ccot,foxandfriends,died,wearing,deaths,positive,tcot,governor,morningjoe
americans,plan,economy,jobs,dead,leadership,republicans,ready,trumps,failed,safe,economic,future,record,yogagenie
black,police,person,racism,george,protest,play,based,realcandaceo,game,community,prison,conservatives,shot,mind
taxes,care,money,health,security,kids,social,public,healthcare,high,school,wall,government,federal,leaders
bidens,hunter,corruption,evidence,russia,rudygiuliani,report,threat,jsolomonreports,dems,trumpwarroom,russian,iran,concerned,epochtimes
lives,change,democracy,theyre,feel,court,justice,rights,matter,order,supreme,climate,care,maryltrump,human
president,states,united,harris,kamala,vice,nomination,elect,accept,normal,congratulations,peace,pence,honor,democratic
speech,rally,campaign,news,fake,antifa,massive,tomorrow,corrupt,disinformation,watching,realdonaldtrumps,likes,crying,attacking
donaldtrump,debate,lies,stop,things,racist,debates2020,remember,bettemidler,ques
from gdtm.models import dNLDA
# Set these paths to the path where you saved the Mallet implementation of each model, plus bin/mallet
tnd_path = 'path/to/mallet-dtnd/bin/mallet'
lda_path = 'path/to/mallet-dlda/bin/mallet'
# We pass in the paths to the java code along with the data set and whatever parameters we want to set
model = dNLDA(dataset=split_dataset, mallet_tnd_path=tnd_path, mallet_lda_path=lda_path, num_time_periods=num_time_periods,
tnd_k=30, lda_k=30, phi=10, top_words=20, save_path='results/dnlda/', run=True)