css / styling of this document is currently default for github. Additional resources listed below
indexing starts at 1 not 0, bring on the OBO-errors! :)
getwd()
setwd('C:/blaaa/just/use/fw_slashes')
4 -> x
x <- 4
x = 4
lists can contain mixed data, vectors will always contain one type of data (even when you pass mixed datatypes).
x <- c(1,2,3,4)
y <- c(6,7,8)
z <- c(x,y) # [1] 1 2 3 4 6 7 8
x <- c(TRUE, FALSE)
x <- c(T, F)
x <- c(TRUE, 7) # works but maybe not as you expect
x <- list(TRUE, 7) # you probably want
# pre populate, vector with 20 NA
v <- rep(NA, 20)
csvdata_f <- read.csv("filename.csv")
csvdata_f <- read.csv("some_directory/filename.csv")
csvdata_f <- read.csv(filename,
colClasses = "character", # not sure
na.strings = c('Not Available')) # replaces all occurances of items in c with NA
# character vector
text <- readLines(filename)
summary()
as.numeric(variable)
as.data.frame(some_matrix)
# use entire dataframe, or select columns
# (either index or column name string)
DM = as.matrix(DF)
# casting and subsetting
DM = as.matrix(DF[c(12,14,25)])
DM = as.matrix(DF[c("colname", "other_colname")])
paste(some_list, sep="")
paste(some_list, collapse="/")
paste(str1, str2, collapse="/")
> fpath = sprintf("%s/%s%s", "some_path", "some_file", ".csv")
> fpath
[1] "some_path/some_file.csv"
# id , can be 2 "02" "002", this will output the same string
fpath = sprintf("%s/%03d%s", "some_path", as.numeric(id), ".csv")
# is a value an NA?
is.na(some_value)
sum(is.na(csvdata_f["col_label"])) # counts num NA
colMeans(csvdata["col_label"], na.rm=TRUE)
ma <- median(some_data_frame[, colnum], na.rm=TRUE) # drop NA from calcs, works on most things.
nd <- subset(csvdata, col_label > some_value & col_label > some_value,)
nd <- subset(csvdata, col_label > some_value | col_label > some_value,)
quantity <- sum(complete.cases(df))
max(nd['col_label']) # if no NA
max(nd['col_label'], na.rm=TRUE) # else
# DF is some data.frame
av = range(DF[, 12], na.rm=TRUE)
bv = range(DF[, 14], na.rm=TRUE)
cv = range(DF[, 25], na.rm=TRUE)
dv = range(c(av,bv,cv))
# or, looks neater
DM = as.matrix(DF[c(12,14,25)])
dv = range(DM, na.rm=TRUE)
1:4
4:1
c(1,2,3,4)
seq(1, 4)
seq(from=1, to=4)
seq(from=1, to=14, by=2)
seq(from=-20, by=2, length= 20)
rep(c(3,2,1), each=2) # [1] 3 3 2 2 1 1
rep(c(3,2,1), times=2) # [1] 3 2 1 3 2 1
# renaming column / rownames names
colnames(dataframe)[column_number] <- "new_name"
# display first n rows of dataframe / matrix
head(dataframe, n=20)
tail(dataframe, n=20) # last 20 rows
# row gets
br[1:3,] # first 3 rows of br
br[3,] # row 3 of br only
# row col gets
br[1:3,]$column_name # returns that column for those rows.
name = c('aa', 'bc', 'ac', 'cd', 'ge', 'hr', 'de', 'ed', 'ae')
state = c('tx','tx','tx','tx','ab','ab','md','md','md')
rank = c(1, 2, 3, 4, 1, 2, 1,2,3)
maxrank = c(4,4,4,4,2,2,3,3,3)
df = data.frame(name, state, rank, maxrank)
# adding to a dataframe use
df <- rbind(df, df2)
for (i in c(4,5,6)){
print(i)
}
for (idx in seq_along(some_vector)){
print(idx)
}
# order on condition 1 then on condition 2
# both forms can occur
m[ order(m[,1],m[,2]), ]
m[ order(m[1],m[2]), ]
if you have to..
r <- regexpr(pat, text, ignore.case=TRUE)
m <- regmatches(text, r)
this might develop into a separate document, but it is included for now. Taken from (https://class.coursera.org/compdata-003/lecture/28)
x <- rnorm(100)
y <- x + rnorm(100, sd = 0.5)
f <- gl(2, 50, labels = c("Group 1", "Group 2"))
# basic plot
xyplot(y ~ x | f)
# show regression line
xyplot(y ~ x | f,
panel = function(x,y, ...) {
panel.xyplot(x,y,...)
panel.lmline(x,y, col=2)
})
# show median of y
xyplot(y ~ x | f,
panel = function(x,y, ...) {
panel.xyplot(x,y,...)
panel.abline(h = median(y), lty=2)
})
taken from https://class.coursera.org/compdata-003/lecture/31
> library(lattice)
> package ? lattice
> library(help = lattice)
> data(environmental)
> ?environmental
> head(environmental)
ozone radiation temperature wind
1 41 190 67 7.4
2 36 118 72 8.0
3 12 149 74 12.6
4 18 313 62 11.5
5 23 299 65 8.6
6 19 99 59 13.8
> xyplot(ozone ~ radiation, data=environmental)
> xyplot(ozone ~ radiation, data=environmental, main = "Ozone vs. Radiation")
> xyplot(ozone ~ temperature, data=environmental)
> summary(environmental$temperature)
Min. 1st Qu. Median Mean 3rd Qu. Max.
57.00 71.00 79.00 77.79 84.50 97.00
> temp.cut <- equal.count(environmental$temperature, 4)
> temp.cut
Data:
[1] 67 72 74 62 65 59 61 69 66 68 58 64 66 57 68 62 59 73 61 61 67 81 79 76 82
[26] 90 87 82 77 72 65 73 76 84 85 81 83 83 88 92 92 89 73 81 80 81 82 84 87 85
[51] 74 86 85 82 86 88 86 83 81 81 81 82 89 90 90 86 82 80 77 79 76 78 78 77 72
[76] 79 81 86 97 94 96 94 91 92 93 93 87 84 80 78 75 73 81 76 77 71 71 78 67 76
[101] 68 82 64 71 81 69 63 70 75 76 68
Intervals:
min max count
1 56.5 76.5 46
2 67.5 81.5 51
3 75.5 86.5 51
4 80.5 97.5 51
Overlap between adjacent intervals:
[1] 27 30 31
> xyplot(ozone ~ radiation | temp.cut, data = environmental)
> xyplot(ozone ~ radiation | temp.cut, data = environmental, layout = c(1,4))
> xyplot(ozone ~ radiation | temp.cut, data = environmental, layout = c(1,4), pch = 20)
> xyplot(ozone ~ radiation | temp.cut, data = environmental, as.table = TRUE)
> # write your own panel function
> xyplot(ozone ~ radiation | temp.cut, data = environmental, as.table = TRUE,
+ panel = function(x, y, ...) {
+ panel.xyplot(x, y, ...)
+ fit <- lm(y ~ x)
+ panel.abline(fit)
+ })
more plotting, using a smoothed estimator
xyplot(ozone ~ radiation | temp.cut, data = environmental,
as.table = TRUE,
panel = function(x, y, ...) {
panel.xyplot(x, y, ...)
panel.loess(x, y)
})
adding labels and main title
xyplot(ozone ~ radiation | temp.cut, data = environmental,
as.table = TRUE,
panel = function(x, y, ...) {
panel.xyplot(x, y, ...)
panel.loess(x, y)
},
xlab = "Solar Radiation",
ylab = "Ozone(ppb)",
main = "Ozone vs. Solar Radiation")
take wind into account
wind.cut <- equal.count(environmental$wind, 4)
xyplot(ozone ~ radiation | temp.cut * wind.cut, data = environmental,
as.table = TRUE,
panel = function(x, y, ...) {
panel.xyplot(x, y, ...)
panel.loess(x, y)
},
xlab = "Solar Radiation",
ylab = "Ozone(ppb)",
main = "Ozone vs. Solar Radiation")
dynamic substitution of labels
from https://class.coursera.org/compdata-003/lecture/29 (week 3)
# dynamic substitution
x <- rnorm(100)
y <- x + rnorm(100, sd = 0.5)
plot(x, y,
xlab = substitute(bar(x) == k, list(k=mean(x))),
ylab = substitute(bar(y) == k, list(k=mean(y)))
)
# or in a loop of plots
par(mfrow = c(2,2))
for(i in 1:4) {
x <- rnorm(100)
hist(x, main=substitute(theta==num, list(num=i)))
}
http://students.washington.edu/mclarkso/
http://faculty.washington.edu/kenrice/sisg/SISG-08-05.pdf
http://www2.warwick.ac.uk/fac/sci/statistics/staff/academic-research/reed/rexercises.pdf
plot titles ( concatenate-strings-and-expressions-in-a-plots-title )
http://stackoverflow.com/questions/4302367/
apply functions:
http://nsaunders.wordpress.com/2010/08/20/a-brief-introduction-to-apply-in-r/