Skip to content

Instantly share code, notes, and snippets.

@manuzhang
Last active August 29, 2015 14:05
Show Gist options
  • Save manuzhang/ccbe784165dbd6d7cdb4 to your computer and use it in GitHub Desktop.
Save manuzhang/ccbe784165dbd6d7cdb4 to your computer and use it in GitHub Desktop.
cleanse stock tweets
ticker_dict = {}
company_list = []
exception_list = []
print "read ticker symbol"
with open("./ticker_symbol.tsv") as ticker_symbol:
for line in ticker_symbol:
words = line.split('\t')
ticker_dict[words[0].strip('"').lower()] = words[1]
print "read stock tweets"
with open("./stock_symbol_keywords.tsv") as stock_tweets:
for line in stock_tweets:
words = line.split('\t')
ticker = words[1][1:].strip('.').lower()
if ticker_dict.has_key(ticker):
company_list.append(ticker_dict[ticker])
else:
exception_list.append(ticker)
print "write company list out"
fo = open("company.txt", "w")
for company in company_list:
fo.write(company)
fo.close()
print "write exception list out"
fo = open("exception.txt", "w")
for ticker in exception_list:
fo.write(ticker)
fo.write('\n')
fo.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment