Skip to content

Instantly share code, notes, and snippets.

View rchurch4's full-sized avatar

Rob Churchill rchurch4

View GitHub Profile
@rchurch4
rchurch4 / ollivanders.py
Last active April 16, 2023 21:48
Sample program for homework 3. Understanding scope and bindings.
import random as r
class Wand:
def __init__(self, length, wood, core):
self.length = length,
self.wood = wood
self.core = core
self.person = 'Nobody'
def set_owner(self, person):
from gdtm.helpers.common import load_dated_dataset, load_split_dataset, split_dataset_by_date, save_split_dataset, month
path_to_data = 'path/to/data/dated_tweets.csv'
dataset = load_dated_dataset(path=path_to_data, date_delimiter='\t', doc_delimiter=',')
# Split the data by month (there are epoch functions for day and week as well)
split_dataset = split_dataset_by_date(dataset, epoch_function=month)
# Save the split data set to make it easier to load in the future
# This is useful if we are running multiple experiments on the same data
from gdtm.models import dNLDA
# Set these paths to the path where you saved the Mallet implementation of each model, plus bin/mallet
tnd_path = 'path/to/mallet-dtnd/bin/mallet'
lda_path = 'path/to/mallet-dlda/bin/mallet'
# We pass in the paths to the java code along with the data set and whatever parameters we want to set
model = dNLDA(dataset=split_dataset, mallet_tnd_path=tnd_path, mallet_lda_path=lda_path, num_time_periods=num_time_periods,
tnd_k=30, lda_k=30, phi=10, top_words=20, save_path='results/dnlda/', run=True)
We can make this file beautiful and searchable if this error is corrected: It looks like row 9 should actually have 15 columns, instead of 10. in line 8.
covid,coronavirus,pandemic,tombx7m,mask,kag2020,ccot,foxandfriends,died,wearing,deaths,positive,tcot,governor,morningjoe
americans,plan,economy,jobs,dead,leadership,republicans,ready,trumps,failed,safe,economic,future,record,yogagenie
black,police,person,racism,george,protest,play,based,realcandaceo,game,community,prison,conservatives,shot,mind
taxes,care,money,health,security,kids,social,public,healthcare,high,school,wall,government,federal,leaders
bidens,hunter,corruption,evidence,russia,rudygiuliani,report,threat,jsolomonreports,dems,trumpwarroom,russian,iran,concerned,epochtimes
lives,change,democracy,theyre,feel,court,justice,rights,matter,order,supreme,climate,care,maryltrump,human
president,states,united,harris,kamala,vice,nomination,elect,accept,normal,congratulations,peace,pence,honor,democratic
speech,rally,campaign,news,fake,antifa,massive,tomorrow,corrupt,disinformation,watching,realdonaldtrumps,likes,crying,attacking
donaldtrump,debate,lies,stop,things,racist,debates2020,remember,bettemidler,ques
coronavirus covid covid19 pandemic mask
economy economic reopen unemployment stimulus
racism black police protest prison
health healthcare insurance medicare obamacare
russia international europe asia arab
climate global environment green warming
vice president pence kamala harris
campaign advertisement fundraising rally speech
debate debates moderator question answer
mailin ballot absentee voter fraud
racist gun violence police worst kind allowed rep families loved
party years democratic democrat republican moment ago political children left
lets youre follow retweet twitter patriots hey heart followers happy
care health plan coronavirus public healthcare congress reminder message word
votebluetosaveamerica gop biden2020 voteblue voteblue2020 dumptrump votebluetoendthisnightmare votebluenomatterwho resist whitehouse
democrats american support voters dems jobs hate supporters law wont
trump2020 kag potus kag2020 tcot maga2020 tombx7m trump2020landslide ccot foxandfriends
from gdtm.helpers.common import save_topics
# Pass in the topics as they are returned from `get_topics()`, a file path, and optionally, a delimiter (the default is a comma)
save_topics(topics, 'results/topics.csv', delimiter=',')
voting vote delegates
party republican convention
from gdtm.models import GTM
# Set these paths to the path where you saved the Mallet implementation of each model, plus bin/mallet
tnd_path = 'path/to/mallet-tnd/bin/mallet'
gtm_path = 'path/to/mallet-gtm/bin/mallet'
seed_topics_file = 'data/seed_topics.csv'
# We pass in the paths to the java code along with the data set and whatever parameters we want to set
model = GTM(dataset=dataset, mallet_tnd_path=tnd_path, mallet_gtm_path=gtm_path,
from gdtm.models import TND
# Set these paths to the path where you saved the Mallet implementation of each model, plus bin/mallet
tnd_path = 'path/to/mallet-tnd/bin/mallet'
# We pass in the paths to the java code along with the data set and whatever parameters we want to set
model = TND(dataset=dataset, mallet_path=tnd_path, k=30, beta1=16, top_words=20)
topics = model.get_topics()
noise = model.get_noise_distribution()