Skip to content

Instantly share code, notes, and snippets.

@aaronsaunders
Created August 8, 2013 10:45
Show Gist options
  • Save aaronsaunders/6183633 to your computer and use it in GitHub Desktop.
Save aaronsaunders/6183633 to your computer and use it in GitHub Desktop.
Manipulating data
with()
within()
round(x, n) # rounds x to n decimal places
ceiling(x) # vector x of smallest integers > x
floor(x) # vector x of largest interger < x
as.integer(x) # truncates real x to integers (compare to round(x, 0)
as.integer(x < cutpoint) # vector x of 0 if less than cutpoint, 1 if greater than cutpoint)
# vectorised logical
ifelse(test, true_value, false_value)
factor(ifelse(a < cutpoint, "Neg", "Pos")) # is another way to dichotomize and to make a factor for analysis
bymedian <- with(InsectSprays, reorder(spray, count, median)) # reorders spray factor by the median count of the observations.
transform(data.df, variable names = some operation) # can be part of a set up for a data set
replace(x, list, values) # remember to assign this to some object i.e., x <- replace(x,x==-9,NA)
# similar to the operation x[x==-9] <- NA
cut(x, breaks, labels = NULL,
include.lowest = FALSE, right = TRUE, dig.lab = 3, ...)
cut(1:10, breaks=2, labels=c("low", "high")
######################
# SUBSETTING
# Selecting Observations (Rows)
newdata <- mydata[1:5, ] # first 5 observerations
# based on variable values
newdata <- mydata[ which(mydata$gender=='F' & mydata$age > 65), ]
newdata <- mydata[c(1,5:10), ]
dat.csv[1:10, "female"]
dat.csv$female[1:10]
# Selecting Variables (columns)
myvars <- c("v1", "v2", "v3")
newdata <- mydata[myvars]
myvars <- names(mydata) %in% c("v1", "v2", "v3")
newdata <- mydata[!myvars]
newdata <- mydata[c(-3,-5)] # exclude 3rd and 5th variable
mydata$v3 <- mydata$v5 <- NULL # delete variables v3 and v5
# Subset Function
subset(dataset, logical) # those objects meeting a logical criterion
subset(data.df, select=variables, logical) # get those objects from a data frame that meet a criterion
newdata <- subset(mydata, age >= 20 | age < 10, select=c(ID, Weight))
newdata <- subset(mydata, sex=="m" & age > 25, select=weight:income)
# Random Samples
# take a random sample of size 50 from a dataset mydata
# sample without replacement
mysample <- mydata[sample(1:nrow(mydata), 50,
replace=FALSE),]
###########################################
# Sorting
rev(x) reverses the elements of x
sort(x) sorts the elements of x in increasing order;
rev(sort(x)) to sort in decreasing order:
order(..., na.last = TRUE, decreasing = FALSE)
order(vector) # returns the indexs in order
order(vector, decreasing = T) # for decreasing
x[order(x$B), ] # sort a dataframe by the order of the elements in B
x[rev(order(x$B)), ] # sort the dataframe in reverse order
complete <- subset(data.df, complete.cases(data.df)) # find those cases with no missing values
rank(vector) # as order but handling ties differently
rank(x, na.last = TRUE,
ties.method = c("average", "first", "random", "max", "min"))
# sorting examples using the mtcars dataset
attach(mtcars)
newdata <- mtcars[order(mpg),] # sort by mpg
newdata <- mtcars[order(mpg, cyl),] # sort by mpg and cyl
newdata <- mtcars[order(mpg, -cyl),] #sort by mpg (ascending) and cyl (descending)
substr(myString, start, stop)
# writing a vector of strings,
write.table(vector_of_interest, file="vector.txt", sep="\n",
col.names = FALSE, row.names=FALSE, quote=FALSE)
write.table(myData, file = "table.txt", sep = "\t", quote = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment