vanatteveldt/hk session 1.r

## hk session 1.r
library(twitteR)
load("~/learningr/api_auth.rda")
twitteR::setup_twitter_oauth(tw_consumer_key, tw_consumer_secret, tw_token, tw_token_secret)

tweets = searchTwitteR("hong", resultType="recent", n = 10, )
tweets = plyr::ldply(tweets, as.data.frame)

library(RTextTools)
library(corpustools)
dtm = create_matrix(tweets$text)
dtm.wordcloud(dtm, freq.fun = sqrt)


x = 1:5
class(x)

x = "data"

d = as.Date("2001-01-01")
class(d)

x = c(1, 2, 3)
x

x2 = c(x, 4)


x123 = 1
123x = 1

x_y = 1
x.y = 1
x$ = 2

df = data.frame(id=1:3, name=c("john", "mary", "pete"), stringsAsFactors = F)

df$id
class(df$name)

?data.frame


df
df$name
df[["name"]]

col = "name"

df[[col]]
df$col

df = data.frame(id=1:3, name=c("john", "mary", "pete"), group=c("a","a","b"))

df$name2 = as.character(df$name)

class(df$name)
class(df$group)

df = data.frame(id=1:3, name=c("john", "mary", "pete"), group=c("a","a","b"), stringsAsFactors = F)
df$group = as.factor(df$group)

df
head(tweets)
colnames(tweets)

as.matrix(df)

summary(tweets)

mean(tweets$retweetCount)


as.list(df)


d = read.csv("data/income_topdecile.csv")

d = na.omit(d)
head(d2)

d[1:10, ]
d[, 1:2]

income.decile = income.decile[(!is.na(income.decile$France)) | (!is.na(income.decile$Germany)), ]
d

d = subset(income.decile, !is.na(France))

d = d[d$Year > 1945, ]

d
d$anglo = d$U.S. + d$U.K.
d$anglo = d$anglo / 2
d$France[d$Year > 1945] = d$France[d$Year > 1945]  / 2

d$anglo[d$anglo < d$Europe] = d$Europe[d$anglo < d$Europe]

d$anglo[d$anglo <= d$Europe] = 1

d$anglo = NULL
d
d$sdfgsdfgfdsg

d$usinq = d$U.S. > d$Europe

d$usinq = as.numeric(d$U.S. > d$Europe )

d$usinq = ifelse(d$U.S. > d$Europe, "US higher", "US lower")

d$usinq2 = as.numeric(as.factor(d$usinq))
d
class(d$usinq)

as.numeric("three")

d$usinq2 = ifelse(d$usinq == "US lower", 1, 0)

d$period = "before"
d$period[d$Year > 1945] = "after"
d$period[d$Year > 1980] = "recent"

d$period2 = cut(d$Year, c(1900, 1945, 1980, 2020), c("before", "after", 'recent'), )
?cut
d$usinq2[d$usinq == "US lower"] = 2

??recode

factor()


nrow(tweets)
d

tweets$text2 = gsub("hong|kong", "@@@@@@", tweets$text, )

hktweets = tweets[grepl("hong", tweets$text, ignore.case = T), ]


d
colnames(d)[1:3] = c("Jaar", "US", "UK")
d
colnames(d)[4] = "Deutschland"

colnames(d)[which(colnames(d) == "Germany")] = "Deutschland"
d
d = plyr::rename(d, c("Europe" = "EU"))
d

d[order(d$Jaar, decreasing = T), ]
d[order(-d$Jaar), ]

arrange(d, EU, Jaar)
	library(twitteR)
	load("~/learningr/api_auth.rda")
	twitteR::setup_twitter_oauth(tw_consumer_key, tw_consumer_secret, tw_token, tw_token_secret)

	tweets = searchTwitteR("hong", resultType="recent", n = 10, )
	tweets = plyr::ldply(tweets, as.data.frame)

	library(RTextTools)
	library(corpustools)
	dtm = create_matrix(tweets$text)
	dtm.wordcloud(dtm, freq.fun = sqrt)


	x = 1:5
	class(x)

	x = "data"

	d = as.Date("2001-01-01")
	class(d)

	x = c(1, 2, 3)
	x

	x2 = c(x, 4)


	x123 = 1
	123x = 1

	x_y = 1
	x.y = 1
	x$ = 2

	df = data.frame(id=1:3, name=c("john", "mary", "pete"), stringsAsFactors = F)

	df$id
	class(df$name)

	?data.frame


	df
	df$name
	df[["name"]]

	col = "name"

	df[[col]]
	df$col

	df = data.frame(id=1:3, name=c("john", "mary", "pete"), group=c("a","a","b"))

	df$name2 = as.character(df$name)

	class(df$name)
	class(df$group)

	df = data.frame(id=1:3, name=c("john", "mary", "pete"), group=c("a","a","b"), stringsAsFactors = F)
	df$group = as.factor(df$group)

	df
	head(tweets)
	colnames(tweets)

	as.matrix(df)

	summary(tweets)

	mean(tweets$retweetCount)


	as.list(df)


	d = read.csv("data/income_topdecile.csv")

	d = na.omit(d)
	head(d2)

	d[1:10, ]
	d[, 1:2]

	income.decile = income.decile[(!is.na(income.decile$France)) \| (!is.na(income.decile$Germany)), ]
	d

	d = subset(income.decile, !is.na(France))

	d = d[d$Year > 1945, ]

	d
	d$anglo = d$U.S. + d$U.K.
	d$anglo = d$anglo / 2
	d$France[d$Year > 1945] = d$France[d$Year > 1945] / 2

	d$anglo[d$anglo < d$Europe] = d$Europe[d$anglo < d$Europe]

	d$anglo[d$anglo <= d$Europe] = 1

	d$anglo = NULL
	d
	d$sdfgsdfgfdsg

	d$usinq = d$U.S. > d$Europe

	d$usinq = as.numeric(d$U.S. > d$Europe )

	d$usinq = ifelse(d$U.S. > d$Europe, "US higher", "US lower")

	d$usinq2 = as.numeric(as.factor(d$usinq))
	d
	class(d$usinq)

	as.numeric("three")

	d$usinq2 = ifelse(d$usinq == "US lower", 1, 0)

	d$period = "before"
	d$period[d$Year > 1945] = "after"
	d$period[d$Year > 1980] = "recent"

	d$period2 = cut(d$Year, c(1900, 1945, 1980, 2020), c("before", "after", 'recent'), )
	?cut
	d$usinq2[d$usinq == "US lower"] = 2

	??recode

	factor()


	nrow(tweets)
	d

	tweets$text2 = gsub("hong\|kong", "@@@@@@", tweets$text, )

	hktweets = tweets[grepl("hong", tweets$text, ignore.case = T), ]


	d
	colnames(d)[1:3] = c("Jaar", "US", "UK")
	d
	colnames(d)[4] = "Deutschland"

	colnames(d)[which(colnames(d) == "Germany")] = "Deutschland"
	d
	d = plyr::rename(d, c("Europe" = "EU"))
	d

	d[order(d$Jaar, decreasing = T), ]
	d[order(-d$Jaar), ]

	arrange(d, EU, Jaar)