Skip to content

Instantly share code, notes, and snippets.

@whilo
Created June 17, 2015 07:54
Show Gist options
  • Save whilo/ed2db43d8bc0aa7849bb to your computer and use it in GitHub Desktop.
Save whilo/ed2db43d8bc0aa7849bb to your computer and use it in GitHub Desktop.
Some R examples
click1 <- function(){
x <- runif(1);y <- runif(1)
plot(x = x, y = y, xlim = c(0, 1), ylim = c(0, 1),
main = "Bitte auf den Punkt klicken",
xlab = '', ylab = '',
axes = FALSE, frame.plot = TRUE)
clicktime <- system.time(xyclick <- locator(1))
c(timestamp = Sys.time(),
x = x, y = y,
xclick = xyclick$x, yclick = xyclick$y,
tclick = clicktime[3])
}
# 1 click wegwerfen, 4-5 einfuehrungsclicks
# lm modell, laufende nummer des clicks muss aufgenommen werden
# besserer vergleich als qqplot ist linearisieren
# vergleich gerade
click <- function(count) {
# 5 einfuehrungsklicks
initClicks <- Map(function(i) {c(index=i, click1())}, 0:5)
#clicks <- Map(function(i) { c(i, click1())}, 1:count)
clicks <- list()
lastClick <- tail(initClicks,1)[[1]] # truely elegant ...
prevX <- lastClick["x"]
prevY <- lastClick["y"]
for (i in 1:count) {
click <- c(index=i, click1())
xclick <- click["xclick"]
yclick <- click["yclick"]
click <- c(click, dist=(click["x"] - xclick)**2 + (click["y"] - yclick)**2)
click <- c(click, way=(xclick-prevX)**2 + (yclick - prevY)**2)
clicks <- append(clicks, list(click))
prevX <<- xclick
prevY <<- yclick
}
clicks <- do.call(rbind.data.frame, clicks)
colnames(clicks) <- c("index", "timestamp", "x", "y", "xclick", "yclick", "tclick", "dist", "way")
clicks
}
# wie viele clicks sinnvoll fuer lm? ermuedung, test auf signifikanz?
# rerun experiment
write.table(click(20), "click20r")
write.table(click(20), "click20l")
write.table(click(20), "click20r2")
write.table(click(20), "click20l2")
# read stored data
right <- rbind(read.table("click20r"), read.table("click20r2"))
left <- rbind(read.table("click20l"), read.table("click20l2"))
# not what is difference between left and right?
# 0-hypothesis: are identical distributed
# alternative
# shift? in linear model
# x_i = mu + e_i i=1,...,n1=n_x
# y_i = mu + a + e_i i=1,.., n_2=n_y
# t-test detects shift alternative
# alternative
# different distribution function
# KS test between ecdfs, continuous cdfs necessary (?)
# appending of lists and dataframes
stack(list(left=left$tclick, right=right$tclick))
# idea, compare time / way, should be normal distributed? t-test for significance
?sample
sample(1:12) # permutation
wilcox.test( right$tclick, left$tclick ) # stetigkeitskorrektur
require(coin)
wilcox_test(right$tclick, left$tclick )
hist(right$tclick / right$way)
# empirical cdf, shift between left and right clearly visible
plot(ecdf(left$tclick), col="green")
lines(ecdf(right$tclick), col="red")
legend(x=1,y=1,legend=c("left", "right"), fill=c("green", "red"))
ks.test(left$tclick, right$tclick)
# p-value: 0.001 (that dists are the same)
# t-test:
# empirical estimator for mean of two groups
# simple case, both same variance, estimate variance
t.test(left$tclick, right$tclick)
t.test(left$tclick, right$tclick, var.equal=TRUE)
# more degrees of freedom
# linear model: (only if normal distributed)
# qqplot comparison of time to click
qqplot(right$tclick, left$tclick)
ppplot(right$tclick, distf=rnorm)
rlm <- lm(formula= right$tclick ~ right$way + right$index + right$dist)
llm <- lm(formula= left$tclick ~ left$way + left$index + left$dist)
require(graphics)
par(mfrow = c(2,2))
plot(rlm)
par(mfrow = c(2,2))
plot(llm)
# monte carlo confidence bounds for theoretical distribution
# qqplot 300 beobachtungen
# genauigkeit, laenge des weges, anzahl der clicks
unif50 <- runif(50)
unif100 <- runif(100)
norm50 <- rnorm(50)
norm100 <- rnorm(100)
lognorm50 <- exp(rnorm(50))
lognorm100 <- exp( rnorm(100))
oldpar <- par(mfrow = c(2, 3))
plot(ecdf(unif50), pch = "[")
plot(ecdf(norm50), pch = "[")
plot(ecdf(lognorm50), pch = "[")
plot(ecdf(unif100), pch = "[")
plot(ecdf(norm100), pch = "[")
plot(ecdf(lognorm100), pch = "[")
par(oldpar)
oldpar <- par(mfrow = c(2, 3))
qqnorm(unif50, main = "Normal Q-Q unif50")
qqnorm(norm50, main = "Normal Q-Q norm50")
qqnorm(lognorm50, main = "Normal Q-Q lognorm50")
qqnorm(unif100, main = "Normal Q-Q unif100")
qqnorm(norm100, main = "Normal Q-Q norm100")
qqnorm(lognorm100, main = "Normal Q-Q lognorm100")
par(oldpar)
tq99 <- function(n, df=1) {
t100 <- rt(n, df)
q <- quantile(t100, probs = seq(0.01, 0.99, 0.98))
st100 <- sort(t100)
ust100 <- st100[q["1%"]<st100]
cst100 <- ust100[q["99%"]>ust100]
cst100
}
# TODO why 19? Aufgabe 1.10: abstract P(T(X0) > T(Xi))
ppplot <- function(x, y=NULL, samps = 19, distf=rnorm) {
if(is.null(y)) {
y <- sort(distf(length(x))) #(1:length(x))/length(x)
} else {
y <- sort(y)
}
plot(sort(x), y, xlab = substitute(x), ylab = expression(F[n]),
main = "Verteilungsfunktion mit Monte-Carlo-Band",
type = "s")
mtext(paste(samps, "Monte-Carlo-Stichproben"), side = 3)
samples <- matrix(distf(length(x)* samps), nrow = length(x), ncol = samps)
samples <- apply(samples, 2, sort)
envelope <- t(apply(samples, 1, range))
lines(envelope[, 1], y, type = "s", col = "red");
lines(envelope[, 2], y, type = "s", col = "red")
}
ppplot(norm100, tq99(100))
ppplot(rnorm(300), rnorm(300))
ks.test(rnorm(300), runif(300))
# chisq vs. ks
# https://stackoverflow.com/questions/21655653/r-chi-square-goodness-of-fit-for-random-numbers-generated
set.seed(1)
df <- data.frame(y = rexp(1000),
z = rcauchy(1000, 100, 100))
ks.test(df$y,"pexp")
# One-sample Kolmogorov-Smirnov test
#
# data: df$y
# D = 0.0387, p-value = 0.1001
# alternative hypothesis: two-sided
ks.test(df$z,"pcauchy",100,100)
# One-sample Kolmogorov-Smirnov test
#
# data: df$z
# D = 0.0296, p-value = 0.3455
# alternative hypothesis: two-sided
breaks <- c(seq(0,10,by=1))
O <- table(cut(df$y,breaks=breaks))
p <- diff(pexp(breaks))
chisq.test(O,p=p, rescale.p=T)
# Chi-squared test for given probabilities
#
# data: O
# X-squared = 7.9911, df = 9, p-value = 0.535
par(mfrow=c(1,2))
plot(qexp(seq(0,1,0.01)),quantile(df$y,seq(0,1,0.01)),
main="Q-Q Plot",ylab="df$Y", xlab="Exponential",
xlim=c(0,5),ylim=c(0,5))
plot(qcauchy(seq(0,.99,0.01),100,100),quantile(df$z,seq(0,.99,0.01)),
main="Q-Q Plot",ylab="df$Z",xlab="Cauchy",
xlim=c(-1000,1000),ylim=c(-1000,1000))
# lm
x <- 1:100
err <- rnorm(100, mean = 0, sd = 10)
y <- 2.5*x + err
lm(y ~ x)
lmres <- lm(y ~ x)
plot(x, y)
abline(lmres)
attach(hills)
scotlm<-lm(time ~ dist + climb)
attach(scotlm)
oldpar <- par(mfrow=c(1,2))
plot(fitted.values, residuals,xlim=c(0,200), main="Scottish Hill Races", pch=19, cex.lab = 1.5)
text.id(fitted.values, residuals, c(7,11,18,31,33,35, 16,14), row.names(hills), cex.id=1.0)
sres<-stdres(scotlm)
plot(fitted.values,sres,xlim=c(0,200), main="Scottish Hill Races",
ylab="MASS::stdres", pch=19, cex.lab = 1.5)
text.id(fitted.values,sres, c(7,11,18,31,33,35, 16,14), row.names(hills), cex.id=1.0)
par(oldpar)
oldpar <- par(mfrow=c(1,2))
scotdffits <-dffits(scotlm)
plot(fitted.values, scotdffits,ylab="DFFITS",xlim=c(0,200),
main="Scottish Hill Races", pch=19, cex.lab = 1.5)
text.id(fitted.values, scotdffits, c(7,11, 18,31,33,35), row.names(hills), cex.id=1.2)
scotcooks <-cooks.distance(scotlm)
plot(fitted.values, scotcooks,ylab="Cooks",xlim=c(0,200),
main="Scottish Hill Races", pch=19, cex.lab = 1.5)
text.id(fitted.values, scotcooks, c(7,11, 18), row.names(hills), cex.id=1.2)
par(oldpar)
library(forward)
fwdhills <- fwdlm(time ~dist + climb, data=hills)
summary(hills[,1:3])
# Guete Simulation p.126
# task view
foo <- Map(function(e) { t.test(exp(rnorm(1000)), exp(rnorm(1000)))$p.value }, 1:1000)
length(foo[foo<0.05])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment