Skip to content

Instantly share code, notes, and snippets.

@natematias
Last active August 29, 2015 14:04
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save natematias/88396aa9eaa091f3745c to your computer and use it in GitHub Desktop.
Save natematias/88396aa9eaa091f3745c to your computer and use it in GitHub Desktop.
Analyze 365Grateful on Twitter
#adapted from minus_context
import nltk
import feedparser
import random
import twitter
import yaml
import string
import re
from urlparse import urlparse
#uses the format defined here: https://github.com/natematias/minus_context/blob/master/config.yaml.sample
api_key = yaml.load(open('config.yaml'))
api = twitter.Api(consumer_key=api_key['api'],
consumer_secret=api_key['consumer_secret'],
access_token_key=api_key['access_token_key'],
access_token_secret=api_key['access_token_secret'])
gratitude = []
# Tweet out a random sentence
tweets = api.GetSearch("#365grateful", include_entities=True, count=1000)
gratitude = tweets
#while(len(gratitude)< 500):
# tweets = api.GetSearch("#365grateful", include_entities=True, count=1000,max_id=gratitude[-1].id)
# gratitude = gratitude + tweets
mention_count = [len(x.user_mentions) for x in gratitude]
plt.hist(mention_count)
plt.ylabel('Tweets mentioning X users')
plt.title('Number of #365grateful tweets mentioning other users', fontsize= 12)
plt.xlabel("Number of people mentioned in #365grateful tweets")
plt.show()
# plot the term frequency distribution among the tweets
tweet_tokens = [re.sub(r'[^\w\s]','',x).lower().strip() for x in nltk.word_tokenize(' '.join([tweet.text for tweet in gratitude])) if not x in nltk.corpus.stopwords.words('english') and not x in string.punctuation]
tweet_tokens = [x for x in tweet_tokens if len(x) > 0]
fdist = nltk.FreqDist(tweet_tokens)
fdist.plot(50, cumulative=False)
tweet_tokens = list(set(nltk.word_tokenize(tweet_text)) - set(nltk.corpus.stopwords.words('english')))
# count URLs
url_count = [len(x.urls) for x in gratitude]
plt.hist(url_count,2)
plt.ylabel('Tweets including urls')
plt.title('Number of #365grateful tweets including links', fontsize= 12)
plt.xlabel("Number of links in #365grateful tweets")
plt.show()
expanded_urls = []
for tweet in gratitude:
expanded_urls = expanded_urls + [url.expanded_url for url in tweet.urls]
baseurls = []
baseurls = [urlparse(url).netloc for url in expanded_urls]
urldist = nltk.FreqDist(baseurls)
urldist.plot(4, cumulative=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment