Skip to content

Instantly share code, notes, and snippets.

@vanatteveldt
Created June 2, 2016 04:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vanatteveldt/55ad799e1d89289997b824ae9e850807 to your computer and use it in GitHub Desktop.
Save vanatteveldt/55ad799e1d89289997b824ae9e850807 to your computer and use it in GitHub Desktop.
library(twitteR)
load("~/learningr/api_auth.rda")
twitteR::setup_twitter_oauth(tw_consumer_key, tw_consumer_secret, tw_token, tw_token_secret)
tweets = searchTwitteR("hong", resultType="recent", n = 10, )
tweets = plyr::ldply(tweets, as.data.frame)
library(RTextTools)
library(corpustools)
dtm = create_matrix(tweets$text)
dtm.wordcloud(dtm, freq.fun = sqrt)
x = 1:5
class(x)
x = "data"
d = as.Date("2001-01-01")
class(d)
x = c(1, 2, 3)
x
x2 = c(x, 4)
x123 = 1
123x = 1
x_y = 1
x.y = 1
x$ = 2
df = data.frame(id=1:3, name=c("john", "mary", "pete"), stringsAsFactors = F)
df$id
class(df$name)
?data.frame
df
df$name
df[["name"]]
col = "name"
df[[col]]
df$col
df = data.frame(id=1:3, name=c("john", "mary", "pete"), group=c("a","a","b"))
df$name2 = as.character(df$name)
class(df$name)
class(df$group)
df = data.frame(id=1:3, name=c("john", "mary", "pete"), group=c("a","a","b"), stringsAsFactors = F)
df$group = as.factor(df$group)
df
head(tweets)
colnames(tweets)
as.matrix(df)
summary(tweets)
mean(tweets$retweetCount)
as.list(df)
d = read.csv("data/income_topdecile.csv")
d = na.omit(d)
head(d2)
d[1:10, ]
d[, 1:2]
income.decile = income.decile[(!is.na(income.decile$France)) | (!is.na(income.decile$Germany)), ]
d
d = subset(income.decile, !is.na(France))
d = d[d$Year > 1945, ]
d
d$anglo = d$U.S. + d$U.K.
d$anglo = d$anglo / 2
d$France[d$Year > 1945] = d$France[d$Year > 1945] / 2
d$anglo[d$anglo < d$Europe] = d$Europe[d$anglo < d$Europe]
d$anglo[d$anglo <= d$Europe] = 1
d$anglo = NULL
d
d$sdfgsdfgfdsg
d$usinq = d$U.S. > d$Europe
d$usinq = as.numeric(d$U.S. > d$Europe )
d$usinq = ifelse(d$U.S. > d$Europe, "US higher", "US lower")
d$usinq2 = as.numeric(as.factor(d$usinq))
d
class(d$usinq)
as.numeric("three")
d$usinq2 = ifelse(d$usinq == "US lower", 1, 0)
d$period = "before"
d$period[d$Year > 1945] = "after"
d$period[d$Year > 1980] = "recent"
d$period2 = cut(d$Year, c(1900, 1945, 1980, 2020), c("before", "after", 'recent'), )
?cut
d$usinq2[d$usinq == "US lower"] = 2
??recode
factor()
nrow(tweets)
d
tweets$text2 = gsub("hong|kong", "@@@@@@", tweets$text, )
hktweets = tweets[grepl("hong", tweets$text, ignore.case = T), ]
d
colnames(d)[1:3] = c("Jaar", "US", "UK")
d
colnames(d)[4] = "Deutschland"
colnames(d)[which(colnames(d) == "Germany")] = "Deutschland"
d
d = plyr::rename(d, c("Europe" = "EU"))
d
d[order(d$Jaar, decreasing = T), ]
d[order(-d$Jaar), ]
arrange(d, EU, Jaar)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment