vanatteveldt/hk session 6.r

## hk session 6.r
load("data/capital.rdata")


head(capital)

library(reshape2)
priv = dcast(capital, Year ~ Country, value.var="Private")
head(priv)
plot(priv[2:4])

png("/tmp/plot.png")

par(mfrow=c(2,2))

ymax = max(priv[-1], na.rm=T)
plot(x=priv$Year, y=priv$Germany, frame.plot = F, xlab = "Year", ylab="Private capital as % of GDP",
     type="n", col="blue", lty=2, lwd=2, ylim=c(2, ymax+3))


polygon(x=c(1990, 2000, 2000, 1990), y=c(2,2,10,10), col = "grey", lty = 0)

colours = rainbow(ncol(priv))
for (i in 2:ncol(priv))
  lines(x=priv$Year, y=priv[[i]], col=colours[i])
abline(v = 1990, lty=2)
abline(v = 2000, lty=2)


abline(lm(France ~ Year, data=priv))

legend("topleft", legend = colnames(priv)[-1], ncol=2, lty = 1, col = colours[-1])
title(main="Graph!")

dev.off()


head(capital)

plot(capital$Private ~ capital$Country)

m = lm(capital$Private ~ capital$Public + capital$Country)

summary(m)
plot(m)


# ggplot2
library(ggplot2)
head(capital)
ggplot(capital, aes(x=Year, y=Private, color=Country)) + geom_line()
 + geom_ribbon(mapping=aes(ymin=Private, ymax=Public))

?geom_ribbon

capital= na.omit(capital)

m = lm(Public ~ Private, data=capital)
regline = geom_line(mapping=aes(y=fitted(m), colour="#000"))

ggplot(capital, aes(x=Private, y=Public, colour=Country))+ geom_point() + regline
fit = as.data.frame(predict(m, interval="confidence"))

band = geom_ribbon(mapping=aes(ymin=fit$lwr, ymax=fit$upr, alpha=.3))
ggplot(capital, aes(x=Private, y=Public))+ geom_point() + regline + band

table(is.na(capital$Private))

ggplot(capital, aes(x=Private, y=Public))+ geom_point(mapping=aes(colour=Country)) + geom_smooth(method="lm")
geom_


library(googleVis)

plot(gvisLineChart(priv, xvar="Year", yvar=colnames(priv)[-1]))


## semnet


library(semnet)

data(simple_dtm)
as.matrix(dtm)

g = coOccurenceNetwork(dtm)

V(g)$size = V(g)$freq*10
E(g)
plot(g)

as_data_frame(g, what="vertices")

data(sotu)

sotu.token = sotu.tokens[sotu.tokens$pos1 == "M",]
head(sotu.token)

g =  windowedCoOccurenceNetwork(sotu.token$id, sotu.token$lemma, sotu.token$aid, window.size=20)

vcount(g)
plot(g)

g2 = decompose(g, max.comps=1, min.vertices = 10)[[1]]
plot(g2)

gb = getBackboneNetwork(g, max.vertices=100)

g2 = decompose(gb, max.comps=1, min.vertices = 10)[[1]]
plot(g2)
V(g2)$cluster = edge.betweenness.community(g2)$membership

g2 = setNetworkAttributes(g2, V(g2)$freq, V(g2)$cluster)
plot(g2)

write.graph(g, file="/tmp/test.ml", format="gml")

library(rgexf)
gefx = igraph.to.gexf(g)
print(gefx, file="/tmp/test.gexf")


data(simple_dtm)
g = coOccurenceNetwork(dtm, measure = "conprob")
as_data_frame(g, what="edges")


lex = readRDS("data/lexicon.rds")
pos_words = lex$word1[lex$priorpolarity == "positive"]
neg_words = lex$word1[lex$priorpolarity == "negative"]

data(sotu)
head(sotu.token)
sotu.tokens$concept[sotu.tokens$lemma == "Iraq"] = "Iraq"
sotu.tokens$concept[sotu.tokens$lemma == "Afghanistan"] = "Afghanistan"
sotu.tokens$concept[sotu.tokens$word %in% pos_words] = "pos"
sotu.tokens$concept[sotu.tokens$word %in% neg_words] = "neg"

table(sotu.tokens$concept)

library(semnet)

g = windowedCoOccurenceNetwork(sotu.tokens$id, sotu.tokens$concept, sotu.tokens$aid, window.size=20)

e = as_data_frame(g, what="edges")
head(e)

e = e[(e$from %in% c("Afghanistan", "Iraq")) & (e$to %in% c("neg", "pos")), ]

d = dcast(e, from ~ to, value.var="weight")
d$sent = (d$pos - d$neg) / (d$pos + d$neg)
d

f = windowedCoOccurenceNetwork(sotu.tokens$id, sotu.tokens$concept, sotu.tokens$aid, window.size=20, output.per.context = T)
head(f)
	load("data/capital.rdata")


	head(capital)

	library(reshape2)
	priv = dcast(capital, Year ~ Country, value.var="Private")
	head(priv)
	plot(priv[2:4])

	png("/tmp/plot.png")

	par(mfrow=c(2,2))

	ymax = max(priv[-1], na.rm=T)
	plot(x=priv$Year, y=priv$Germany, frame.plot = F, xlab = "Year", ylab="Private capital as % of GDP",
	type="n", col="blue", lty=2, lwd=2, ylim=c(2, ymax+3))


	polygon(x=c(1990, 2000, 2000, 1990), y=c(2,2,10,10), col = "grey", lty = 0)

	colours = rainbow(ncol(priv))
	for (i in 2:ncol(priv))
	lines(x=priv$Year, y=priv[[i]], col=colours[i])
	abline(v = 1990, lty=2)
	abline(v = 2000, lty=2)


	abline(lm(France ~ Year, data=priv))

	legend("topleft", legend = colnames(priv)[-1], ncol=2, lty = 1, col = colours[-1])
	title(main="Graph!")

	dev.off()



	head(capital)

	plot(capital$Private ~ capital$Country)

	m = lm(capital$Private ~ capital$Public + capital$Country)

	summary(m)
	plot(m)




	# ggplot2
	library(ggplot2)
	head(capital)
	ggplot(capital, aes(x=Year, y=Private, color=Country)) + geom_line()
	+ geom_ribbon(mapping=aes(ymin=Private, ymax=Public))

	?geom_ribbon

	capital= na.omit(capital)

	m = lm(Public ~ Private, data=capital)
	regline = geom_line(mapping=aes(y=fitted(m), colour="#000"))

	ggplot(capital, aes(x=Private, y=Public, colour=Country))+ geom_point() + regline
	fit = as.data.frame(predict(m, interval="confidence"))

	band = geom_ribbon(mapping=aes(ymin=fit$lwr, ymax=fit$upr, alpha=.3))
	ggplot(capital, aes(x=Private, y=Public))+ geom_point() + regline + band

	table(is.na(capital$Private))

	ggplot(capital, aes(x=Private, y=Public))+ geom_point(mapping=aes(colour=Country)) + geom_smooth(method="lm")
	geom_



	library(googleVis)

	plot(gvisLineChart(priv, xvar="Year", yvar=colnames(priv)[-1]))




	## semnet


	library(semnet)

	data(simple_dtm)
	as.matrix(dtm)

	g = coOccurenceNetwork(dtm)

	V(g)$size = V(g)$freq*10
	E(g)
	plot(g)

	as_data_frame(g, what="vertices")

	data(sotu)

	sotu.token = sotu.tokens[sotu.tokens$pos1 == "M",]
	head(sotu.token)

	g = windowedCoOccurenceNetwork(sotu.token$id, sotu.token$lemma, sotu.token$aid, window.size=20)

	vcount(g)
	plot(g)

	g2 = decompose(g, max.comps=1, min.vertices = 10)[[1]]
	plot(g2)

	gb = getBackboneNetwork(g, max.vertices=100)

	g2 = decompose(gb, max.comps=1, min.vertices = 10)[[1]]
	plot(g2)
	V(g2)$cluster = edge.betweenness.community(g2)$membership

	g2 = setNetworkAttributes(g2, V(g2)$freq, V(g2)$cluster)
	plot(g2)

	write.graph(g, file="/tmp/test.ml", format="gml")

	library(rgexf)
	gefx = igraph.to.gexf(g)
	print(gefx, file="/tmp/test.gexf")


	data(simple_dtm)
	g = coOccurenceNetwork(dtm, measure = "conprob")
	as_data_frame(g, what="edges")


	lex = readRDS("data/lexicon.rds")
	pos_words = lex$word1[lex$priorpolarity == "positive"]
	neg_words = lex$word1[lex$priorpolarity == "negative"]

	data(sotu)
	head(sotu.token)
	sotu.tokens$concept[sotu.tokens$lemma == "Iraq"] = "Iraq"
	sotu.tokens$concept[sotu.tokens$lemma == "Afghanistan"] = "Afghanistan"
	sotu.tokens$concept[sotu.tokens$word %in% pos_words] = "pos"
	sotu.tokens$concept[sotu.tokens$word %in% neg_words] = "neg"

	table(sotu.tokens$concept)

	library(semnet)

	g = windowedCoOccurenceNetwork(sotu.tokens$id, sotu.tokens$concept, sotu.tokens$aid, window.size=20)

	e = as_data_frame(g, what="edges")
	head(e)

	e = e[(e$from %in% c("Afghanistan", "Iraq")) & (e$to %in% c("neg", "pos")), ]

	d = dcast(e, from ~ to, value.var="weight")
	d$sent = (d$pos - d$neg) / (d$pos + d$neg)
	d

	f = windowedCoOccurenceNetwork(sotu.tokens$id, sotu.tokens$concept, sotu.tokens$aid, window.size=20, output.per.context = T)
	head(f)