kateto/Politwoops-Tweet-Analysis.R

## Politwoops-Tweet-Analysis.R
##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
## Download and examine deleted congress tweets       ##
## Data Source:  politwoops.sunlightfoundation.com    ##
## Analysis: Katherine Ognyanova at www.kateto.net    ##
## Visualizations: http://kateto.net/politwoops       ##
##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##

library(RJSONIO)
library(RCurl)
library(plyr)

options(stringsAsFactors = FALSE)

##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
# Data:
# pw.df - deleted tweets data frame
# pol.inf - politician info data frame
# pw.dfs - deleted tweets +  politician info data frame
##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
## Get Politwoops data using package PolitwoopsR      ##


install.packages('devtools')
library(devtools)
install_github('kateto/PolitwoopsR')
library(PolitwoopsR)

# Get tweet JSONs:
pw.df <- get_pw_tweets()
dim(pw.df)
colnames(pw.df)

# Get politician data:
pol.inf <- get_pw_pol()
dim(pol.inf)
head(pol.inf)

# Combine tweet and politician data:
pw.dfs <- merge_pw(pw.df, pol.inf)

##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
## TWEET URLs: Expand and extract domains              ##

# Expand URLs:
pw.dfs$url.decoded <- url_expand(pw.df)

# Extract domains from the URLs:
pw.dfs$url.domain <- url_domain(pw.dfs$url.decoded)

# Any short URLs left in the data after expanding are are problematic -
# they may have been shortened multuple times, or they might be
# broken, incorrect, expired, or pointing to sites flagged as unsafe.

problem.urls <- which( !is.na(pw.dfs$url.decoded) & (grepl("//t\\.co", pw.dfs$url.decoded) |
                                                     grepl("/ow\\.ly", pw.dfs$url.decoded) |
                                                     grepl("/bit\\.ly", pw.dfs$url.decoded) |
                                                     grepl("tinyurl", pw.dfs$url.decoded)))
pw.dfs$url.decoded[problem.urls]


# We could remove those, or give them another pass first to extract the double-shortened urls:
for(i in problem.urls) {
  tryCatch({pw.dfs$url.decoded[i] <- getURLContent(pw.dfs$url.decoded[i], header=T)$header["Location"] },
           error = function(err){print(paste("ERROR: ",err, "at #", i)); return("Incorrect/broken link")}) }

pw.dfs$url.domain <- url_domain(pw.dfs$url.decoded)
pw.dfs$url.domain <- tolower(pw.dfs$url.domain)


detach(package:twitteR)
detach(library:XML)
detach(library:RCurl)


##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
## TWEET TEXT: clean, stem, stem complete, top terms   ##

# Detect tweet language:
library("textcat")

languages <- TC_byte_profiles[names(TC_byte_profiles) %in% c("english", "spanish")]

pw.dfs$lang  <- textcat(pw.dfs$cclean , p=languages)
pw.dfs$lang[is.na(pw.dfs$lang)] <- "en"
pw.dfs$lang[pw.dfs$lang=="english"] <- "en"
pw.dfs$lang[pw.dfs$lang=="spanish"] <- "es"

count(pw.dfs$lang)

detach(package:textcat)

# Clean the text:
pw.dfs$cclean <- pw.dfs$content
pw.dfs$cclean <- clean_text(pw.dfs$cclean)

# Removing stopwords, punctuation, numbers:
library("RWeka")
library("tm")
library("SnowballC")

tw.stop <- c(stopwords('english'), "twitter", "tweets", "tweet", "retweet",
             "tweeting", "account", "rt", "via", "cc", "ht")
pw.dfs$cclean <-  removeWords(pw.dfs$cclean, tw.stop)
pw.dfs$cclean <- removePunctuation(pw.dfs$cclean)
pw.dfs$cclean <- removeNumbers(pw.dfs$cclean)

# Stemming:
stemrw <- function(txt){paste(LovinsStemmer(WordTokenizer(txt)), collapse=" ")}
stemrtm <- function(txt){paste(wordStem(WordTokenizer(txt)), collapse=" ")}
stemc <- function(x, d){ (paste(stemCompletion(WordTokenizer(x), dictionary = d), collapse=" "))  }

dict <- WordTokenizer(pw.dfs$cclean)

pw.dfs$cstem  <- apply(as.data.frame(pw.dfs$cclean), 1, stemrw)

# Note that the stemmer appears to have problems with terms that end in t -
# e.g. it stems "meet" to "mees", "get" to "ges", etc.
# Those might have to be fixed manually later when doing stem completion.

# Create a corpus:
text.corpus <- Corpus(VectorSource(pw.dfs$cstem ))

# Building a Document-Term Matrix:
text.dtm <- TermDocumentMatrix(text.corpus, control = list(minWordLength = 1))
inspect(text.dtm)

# Terms that occur more than 100 times:
findFreqTerms(text.dtm, lowfreq = 100)

# Term frequency:
termFreq <- rowSums(as.matrix(text.dtm))

# which words are associated with "government" and "Obama"?
# Note that words are stemmed:
findAssocs(text.dtm, 'governm', 0.05)
findAssocs(text.dtm, 'obam', 0.05)

# Top 150 terms - with stem completion:
text.mat <- as.matrix(text.dtm)
top.terms <- data.frame(terms = stemCompletion(names(sort(rowSums(text.mat), decreasing=T)[1:150]), dict),
                        freq  = sort(rowSums(text.mat), decreasing=T)[1:150], stringsAsFactors=F)

detach(package:RWeka)
detach(package:tm)
detach(package:SnowballC)


# Top term wordcloud:
library(wordcloud)
library(RColorBrewer)

pald <- brewer.pal(8,"Dark2")
wordcloud(top.terms$terms, top.terms$freq, min.freq=100, rot.per=.15, colors=pald)

detach(package:wordcloud)

##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
## TWEET HASHTAGS: extract all & by party             ##

library("plyr")
library("RWeka")


all.tags <- grep("^#.+", WordTokenizer(tolower(pw.dfs$content)), value=T)
rep.tags <- grep("^#.+", WordTokenizer(tolower(pw.dfs$content[pw.dfs$party=="Rep"])), value=T)
dem.tags <-  grep("^#.+", WordTokenizer(tolower(pw.dfs$content[pw.dfs$party=="Dem"])), value=T)

all.tags <- count(all.tags)[order(count(all.tags)[,2], decreasing=T),]
rep.tags <- count(rep.tags)[order(count(rep.tags)[,2], decreasing=T),]
dem.tags <- count(dem.tags)[order(count(dem.tags)[,2], decreasing=T),]

detach(package:RWeka)


##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
## TWEET SENTIMENT: sentiment and polarity             ##

library(tm)
library(tm.plugin.sentiment)

# NOTE THAT THE tm_tag_score funciton has been renamed to tm_term_score !

tm_tag_score <- tm_term_score

pw.dfs$tw.polarity <- polarity(text.dtm)
pw.dfs$tw.polarity[is.na(pw.dfs$tw.polarity) | is.nan(pw.dfs$tw.polarity)] <- 0

pol.pos <- pos_refs_per_ref(text.dtm)
pol.neg <- neg_refs_per_ref(text.dtm)


detach(package:tm.plugin.sentiment)
detach(package:tm)


library(qdap)

qdap.polarity <- polarity(pw.dfs$cclean)$all

pw.dfs$word.count <- qdap.polarity$wc
pw.dfs$tw.polarity2 <- qdap.polarity$polarity
pw.dfs$tw.polarity2[is.na(pw.dfs$tw.polarity2) | is.nan(pw.dfs$tw.polarity2)] <- 0


detach(package:qdap)

##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
## COLLECT ADDITIONAL DATA FROM TWITTER                ##

library(twitteR)

# Create an app at http://apps.twitter.com/
# Go to the "Keys and access token" tab and get the key info to fill below:
# (if access_token & access_secret are not included, browser authentification is required.

auth <- function(){ setup_twitter_oauth(consumer_key="CONSUMER_KEY_HERE",
                            consumer_secret="CONSUMER_SECRET_HERE")
                          #  access_token="ACCESS_TOKEN_HERE",
                          #  access_secret="ACCESS_SECRET_HERE")
                  }
auth()


# Twiter usernames from Politwoops deleted tweet data:
polit.user <- unique(pw.dfs$twitter)
polit.id <- unique(pw.dfs$id)

# Get info about the users from the Twitter API:
polit.twinfo <-  lookupUsers(polit.user)

# The twitteR class behaves a bit odd at times, so easier to work with a list extracted from it:
polit.tw <- c("id", "name", "statusesCount", "followersCount", "friendsCount", "profileImageUrl",
               "created", "verified", "location", "screenName")

polit.tw <- lapply(polit.twinfo, function(x){ y <- list(); for(i in tw.fields) y[[i]] <- x[[i]]; y })


# Download Twitter user profile images
# (to use in visualizations)

library("png")
library("jpeg")

img.dld <- data.frame( file.url=sapply(polit.tw, function(x){x$profileImageUrl}),
                       user.id=sapply(polit.tw, function(x){x$id}),
                       user.name=names(polit.tw), stringsAsFactors=F )
img.dld$file.url  <-  sub("_normal","", img.dld$file.url, fixed=T)
img.dld$file.name <- paste0(img.dld$user.name, sub(".*(\\.[^\\.]+$)","\\1", img.dld$file.url))
img.dld$file.name <-  gsub("\\.jpeg","\\.jpg", img.dld$file.name)

img.dld <- img.dld[order(img.dld$user.id),]

for (i in 1:nrow(img.dld) ) {
  download.file(img.dld$file.url[i],  img.dld$file.name[i],  mode = 'wb')  }

detach(package:jpeg)
detach(package:png)


# Get info about the followers, followees & timeline of each politician:
# Rotate the type of data obtained from twitter (through get.ind$iter),
# and pause for sleep.minutes when the Twitter rate limit is reached
# (this is slow due to rate limits and may take a few days to run).
get.ind <- data.frame(fr=1, fol=1, tw=1, current="fr", iter=0)
sleep.minutes <- 5

while(any(get.ind[,1:3] <= length(polit.tw))) {
  get.ind$iter <- get.ind$iter+1
  tryCatch({ if(get.ind$fr <= length(polit.tw) & get.ind$iter %% 3==1) {
                 get.ind$current <- "fr"
                 rorl <- ifelse(polit.tw[[get.ind$fr]]$friendsCount < 5000, 0, 3000) # the API only serves 5000 at a time
                 polit.tw[[get.ind$fr]]$friend.ids <- polit.twinfo[[get.ind$fr]]$getFriendIDs(retryOnRateLimit=rorl)
                 get.ind$fr <- get.ind$fr+1 }
             if(get.ind$fol <= length(polit.tw) & get.ind$iter %% 3==2) {
                 get.ind$current <- "fol"
                 rorl <- ifelse(polit.tw[[get.ind$fr]]$followersCount < 5000, 0, 3000) # the API only serves 5000 at a time
                 polit.tw[[get.ind$fol]]$follower.ids <- polit.twinfo[[get.ind$fol]]$getFollowerIDs(retryOnRateLimit=rorl)
                 get.ind$fol <- get.ind$fol+1 }
             if(get.ind$tw <= length(polit.tw) & get.ind$iter %% 3==0) {
                 get.ind$current <- "tw"
                 polit.tw[[get.ind$tw]]$tweets <- userTimeline(names(polit.tw)[get.ind$tw], retryOnRateLimit=0)
                 get.ind$tw <- get.ind$tw+1  }
            },
            warning = function(warn){ print(paste("Warning at ",  get.ind$current, "=", get.ind[get.ind$current], "and time =", Sys.time()))
                                      Sys.sleep(sleep.minutes*60) } ) }


detach(package:twitteR)


##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
	##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
	## Download and examine deleted congress tweets ##
	## Data Source: politwoops.sunlightfoundation.com ##
	## Analysis: Katherine Ognyanova at www.kateto.net ##
	## Visualizations: http://kateto.net/politwoops ##
	##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##

	library(RJSONIO)
	library(RCurl)
	library(plyr)

	options(stringsAsFactors = FALSE)

	##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
	# Data:
	# pw.df - deleted tweets data frame
	# pol.inf - politician info data frame
	# pw.dfs - deleted tweets + politician info data frame
	##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
	## Get Politwoops data using package PolitwoopsR ##


	install.packages('devtools')
	library(devtools)
	install_github('kateto/PolitwoopsR')
	library(PolitwoopsR)

	# Get tweet JSONs:
	pw.df <- get_pw_tweets()
	dim(pw.df)
	colnames(pw.df)

	# Get politician data:
	pol.inf <- get_pw_pol()
	dim(pol.inf)
	head(pol.inf)

	# Combine tweet and politician data:
	pw.dfs <- merge_pw(pw.df, pol.inf)

	##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
	## TWEET URLs: Expand and extract domains ##

	# Expand URLs:
	pw.dfs$url.decoded <- url_expand(pw.df)

	# Extract domains from the URLs:
	pw.dfs$url.domain <- url_domain(pw.dfs$url.decoded)

	# Any short URLs left in the data after expanding are are problematic -
	# they may have been shortened multuple times, or they might be
	# broken, incorrect, expired, or pointing to sites flagged as unsafe.

	problem.urls <- which( !is.na(pw.dfs$url.decoded) & (grepl("//t\\.co", pw.dfs$url.decoded) \|
	grepl("/ow\\.ly", pw.dfs$url.decoded) \|
	grepl("/bit\\.ly", pw.dfs$url.decoded) \|
	grepl("tinyurl", pw.dfs$url.decoded)))
	pw.dfs$url.decoded[problem.urls]


	# We could remove those, or give them another pass first to extract the double-shortened urls:
	for(i in problem.urls) {
	tryCatch({pw.dfs$url.decoded[i] <- getURLContent(pw.dfs$url.decoded[i], header=T)$header["Location"] },
	error = function(err){print(paste("ERROR: ",err, "at #", i)); return("Incorrect/broken link")}) }

	pw.dfs$url.domain <- url_domain(pw.dfs$url.decoded)
	pw.dfs$url.domain <- tolower(pw.dfs$url.domain)


	detach(package:twitteR)
	detach(library:XML)
	detach(library:RCurl)


	##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
	## TWEET TEXT: clean, stem, stem complete, top terms ##

	# Detect tweet language:
	library("textcat")

	languages <- TC_byte_profiles[names(TC_byte_profiles) %in% c("english", "spanish")]

	pw.dfs$lang <- textcat(pw.dfs$cclean , p=languages)
	pw.dfs$lang[is.na(pw.dfs$lang)] <- "en"
	pw.dfs$lang[pw.dfs$lang=="english"] <- "en"
	pw.dfs$lang[pw.dfs$lang=="spanish"] <- "es"

	count(pw.dfs$lang)

	detach(package:textcat)

	# Clean the text:
	pw.dfs$cclean <- pw.dfs$content
	pw.dfs$cclean <- clean_text(pw.dfs$cclean)

	# Removing stopwords, punctuation, numbers:
	library("RWeka")
	library("tm")
	library("SnowballC")

	tw.stop <- c(stopwords('english'), "twitter", "tweets", "tweet", "retweet",
	"tweeting", "account", "rt", "via", "cc", "ht")
	pw.dfs$cclean <- removeWords(pw.dfs$cclean, tw.stop)
	pw.dfs$cclean <- removePunctuation(pw.dfs$cclean)
	pw.dfs$cclean <- removeNumbers(pw.dfs$cclean)

	# Stemming:
	stemrw <- function(txt){paste(LovinsStemmer(WordTokenizer(txt)), collapse=" ")}
	stemrtm <- function(txt){paste(wordStem(WordTokenizer(txt)), collapse=" ")}
	stemc <- function(x, d){ (paste(stemCompletion(WordTokenizer(x), dictionary = d), collapse=" ")) }

	dict <- WordTokenizer(pw.dfs$cclean)

	pw.dfs$cstem <- apply(as.data.frame(pw.dfs$cclean), 1, stemrw)

	# Note that the stemmer appears to have problems with terms that end in t -
	# e.g. it stems "meet" to "mees", "get" to "ges", etc.
	# Those might have to be fixed manually later when doing stem completion.

	# Create a corpus:
	text.corpus <- Corpus(VectorSource(pw.dfs$cstem ))

	# Building a Document-Term Matrix:
	text.dtm <- TermDocumentMatrix(text.corpus, control = list(minWordLength = 1))
	inspect(text.dtm)

	# Terms that occur more than 100 times:
	findFreqTerms(text.dtm, lowfreq = 100)

	# Term frequency:
	termFreq <- rowSums(as.matrix(text.dtm))

	# which words are associated with "government" and "Obama"?
	# Note that words are stemmed:
	findAssocs(text.dtm, 'governm', 0.05)
	findAssocs(text.dtm, 'obam', 0.05)

	# Top 150 terms - with stem completion:
	text.mat <- as.matrix(text.dtm)
	top.terms <- data.frame(terms = stemCompletion(names(sort(rowSums(text.mat), decreasing=T)[1:150]), dict),
	freq = sort(rowSums(text.mat), decreasing=T)[1:150], stringsAsFactors=F)

	detach(package:RWeka)
	detach(package:tm)
	detach(package:SnowballC)


	# Top term wordcloud:
	library(wordcloud)
	library(RColorBrewer)

	pald <- brewer.pal(8,"Dark2")
	wordcloud(top.terms$terms, top.terms$freq, min.freq=100, rot.per=.15, colors=pald)

	detach(package:wordcloud)

	##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
	## TWEET HASHTAGS: extract all & by party ##

	library("plyr")
	library("RWeka")


	all.tags <- grep("^#.+", WordTokenizer(tolower(pw.dfs$content)), value=T)
	rep.tags <- grep("^#.+", WordTokenizer(tolower(pw.dfs$content[pw.dfs$party=="Rep"])), value=T)
	dem.tags <- grep("^#.+", WordTokenizer(tolower(pw.dfs$content[pw.dfs$party=="Dem"])), value=T)

	all.tags <- count(all.tags)[order(count(all.tags)[,2], decreasing=T),]
	rep.tags <- count(rep.tags)[order(count(rep.tags)[,2], decreasing=T),]
	dem.tags <- count(dem.tags)[order(count(dem.tags)[,2], decreasing=T),]

	detach(package:RWeka)


	##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
	## TWEET SENTIMENT: sentiment and polarity ##

	library(tm)
	library(tm.plugin.sentiment)

	# NOTE THAT THE tm_tag_score funciton has been renamed to tm_term_score !

	tm_tag_score <- tm_term_score

	pw.dfs$tw.polarity <- polarity(text.dtm)
	pw.dfs$tw.polarity[is.na(pw.dfs$tw.polarity) \| is.nan(pw.dfs$tw.polarity)] <- 0

	pol.pos <- pos_refs_per_ref(text.dtm)
	pol.neg <- neg_refs_per_ref(text.dtm)


	detach(package:tm.plugin.sentiment)
	detach(package:tm)


	library(qdap)

	qdap.polarity <- polarity(pw.dfs$cclean)$all

	pw.dfs$word.count <- qdap.polarity$wc
	pw.dfs$tw.polarity2 <- qdap.polarity$polarity
	pw.dfs$tw.polarity2[is.na(pw.dfs$tw.polarity2) \| is.nan(pw.dfs$tw.polarity2)] <- 0


	detach(package:qdap)

	##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##
	## COLLECT ADDITIONAL DATA FROM TWITTER ##

	library(twitteR)

	# Create an app at http://apps.twitter.com/
	# Go to the "Keys and access token" tab and get the key info to fill below:
	# (if access_token & access_secret are not included, browser authentification is required.

	auth <- function(){ setup_twitter_oauth(consumer_key="CONSUMER_KEY_HERE",
	consumer_secret="CONSUMER_SECRET_HERE")
	# access_token="ACCESS_TOKEN_HERE",
	# access_secret="ACCESS_SECRET_HERE")
	}
	auth()


	# Twiter usernames from Politwoops deleted tweet data:
	polit.user <- unique(pw.dfs$twitter)
	polit.id <- unique(pw.dfs$id)

	# Get info about the users from the Twitter API:
	polit.twinfo <- lookupUsers(polit.user)

	# The twitteR class behaves a bit odd at times, so easier to work with a list extracted from it:
	polit.tw <- c("id", "name", "statusesCount", "followersCount", "friendsCount", "profileImageUrl",
	"created", "verified", "location", "screenName")

	polit.tw <- lapply(polit.twinfo, function(x){ y <- list(); for(i in tw.fields) y[[i]] <- x[[i]]; y })


	# Download Twitter user profile images
	# (to use in visualizations)

	library("png")
	library("jpeg")

	img.dld <- data.frame( file.url=sapply(polit.tw, function(x){x$profileImageUrl}),
	user.id=sapply(polit.tw, function(x){x$id}),
	user.name=names(polit.tw), stringsAsFactors=F )
	img.dld$file.url <- sub("_normal","", img.dld$file.url, fixed=T)
	img.dld$file.name <- paste0(img.dld$user.name, sub(".*(\\.[^\\.]+$)","\\1", img.dld$file.url))
	img.dld$file.name <- gsub("\\.jpeg","\\.jpg", img.dld$file.name)

	img.dld <- img.dld[order(img.dld$user.id),]

	for (i in 1:nrow(img.dld) ) {
	download.file(img.dld$file.url[i], img.dld$file.name[i], mode = 'wb') }

	detach(package:jpeg)
	detach(package:png)


	# Get info about the followers, followees & timeline of each politician:
	# Rotate the type of data obtained from twitter (through get.ind$iter),
	# and pause for sleep.minutes when the Twitter rate limit is reached
	# (this is slow due to rate limits and may take a few days to run).
	get.ind <- data.frame(fr=1, fol=1, tw=1, current="fr", iter=0)
	sleep.minutes <- 5

	while(any(get.ind[,1:3] <= length(polit.tw))) {
	get.ind$iter <- get.ind$iter+1
	tryCatch({ if(get.ind$fr <= length(polit.tw) & get.ind$iter %% 3==1) {
	get.ind$current <- "fr"
	rorl <- ifelse(polit.tw[[get.ind$fr]]$friendsCount < 5000, 0, 3000) # the API only serves 5000 at a time
	polit.tw[[get.ind$fr]]$friend.ids <- polit.twinfo[[get.ind$fr]]$getFriendIDs(retryOnRateLimit=rorl)
	get.ind$fr <- get.ind$fr+1 }
	if(get.ind$fol <= length(polit.tw) & get.ind$iter %% 3==2) {
	get.ind$current <- "fol"
	rorl <- ifelse(polit.tw[[get.ind$fr]]$followersCount < 5000, 0, 3000) # the API only serves 5000 at a time
	polit.tw[[get.ind$fol]]$follower.ids <- polit.twinfo[[get.ind$fol]]$getFollowerIDs(retryOnRateLimit=rorl)
	get.ind$fol <- get.ind$fol+1 }
	if(get.ind$tw <= length(polit.tw) & get.ind$iter %% 3==0) {
	get.ind$current <- "tw"
	polit.tw[[get.ind$tw]]$tweets <- userTimeline(names(polit.tw)[get.ind$tw], retryOnRateLimit=0)
	get.ind$tw <- get.ind$tw+1 }
	},
	warning = function(warn){ print(paste("Warning at ", get.ind$current, "=", get.ind[get.ind$current], "and time =", Sys.time()))
	Sys.sleep(sleep.minutes*60) } ) }



	detach(package:twitteR)


	##vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv##