Last active
November 9, 2022 18:21
-
-
Save mmmayo13/d4e39dca78d914ea1937dbe7054841da to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Hashtags & mentions | |
tag_dict = {} | |
mention_dict = {} | |
for i in tweet_df.index: | |
tweet_text = tweet_df.ix[i]['Tweet text'] | |
tweet = tweet_text.lower() | |
tweet_tokenized = tweet.split() | |
for word in tweet_tokenized: | |
# Hashtags - tokenize and build dict of tag counts | |
if (word[0:1] == '#' and len(word) > 1): | |
key = word.translate(string.maketrans("",""), string.punctuation) | |
if key in tag_dict: | |
tag_dict[key] += 1 | |
else: | |
tag_dict[key] = 1 | |
# Mentions - tokenize and build dict of mention counts | |
if (word[0:1] == '@' and len(word) > 1): | |
key = word.translate(string.maketrans("",""), string.punctuation) | |
if key in mention_dict: | |
mention_dict[key] += 1 | |
else: | |
mention_dict[key] = 1 | |
# The 10 most popular tags and counts | |
top_tags = dict(sorted(tag_dict.iteritems(), key=operator.itemgetter(1), reverse=True)[:10]) | |
top_tags_sorted = sorted(top_tags.items(), key=lambda x: x[1])[::-1] | |
print 'Top 10 hashtags:' | |
print '----------------' | |
for tag in top_tags_sorted: | |
print tag[0], '-', str(tag[1]) | |
# The 10 most popular mentions and counts | |
top_mentions = dict(sorted(mention_dict.iteritems(), key=operator.itemgetter(1), reverse=True)[:10]) | |
top_mentions_sorted = sorted(top_mentions.items(), key=lambda x: x[1])[::-1] | |
print '\nTop 10 mentions:' | |
print '----------------' | |
for mention in top_mentions_sorted: | |
print mention[0], '-', str(mention[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
AttributeError: module 'string' has no attribute 'translate'