Skip to content

Instantly share code, notes, and snippets.

@coilysiren
Last active October 5, 2015 05:13
Show Gist options
  • Save coilysiren/ad90676b7f17044dbdb2 to your computer and use it in GitHub Desktop.
Save coilysiren/ad90676b7f17044dbdb2 to your computer and use it in GitHub Desktop.
import os
import re
import twython
import dotenv
with open('fillers.txt', 'r') as f:
filler_words = '|'.join(f.read().splitlines())
remove_fillers = re.compile(r'\b('+filler_words+r')\b', flags=re.IGNORECASE)
def remove_filler_words(string):
string = remove_fillers.sub('', string)
return string
def remove_punctuation(string):
string = re.sub(r'[.,\'<>\"#%^~?!*-]', '', string)
return string
def remove_short_words(string):
string = re.sub(r'\b[A-Z]{1,2}\b', '', string, flags=re.IGNORECASE)
return string
def remove_non_words(string):
string = re.sub(r'\b![A-Z]+\b', '', string, flags=re.IGNORECASE)
return string
class WordContainer(object):
def __init__(self):
self.words = {}
def add_string(self, string):
# print(string)
string = string.lower()
string = remove_filler_words(string)
string = remove_punctuation(string)
string = remove_short_words(string)
string = remove_non_words(string)
self.count_words(string)
# print(string)
print(sorted(self.words.items(), key=lambda item: item[1]))
print()
def count_words(self, string):
for word in string.split():
try:
self.words[word] += 1
except KeyError:
self.words[word] = 1
class Streamer(twython.TwythonStreamer):
def on_success(self, data):
if 'text' in data:
self.container.add_string(data['text'])
def on_error(self, status_code, data):
print('error status code: {}\n{}'.format(status_code, data))
dotenv.load_dotenv('.env')
stream = Streamer(
os.environ['API_KEY'],
os.environ['API_SECRET'],
os.environ['ACCESS_TOKEN'],
os.environ['ACCESS_SECRET']
)
stream.container = WordContainer()
stream.user()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment