aaronsaunders/Manipulating_data

## Manipulating_data
with()

within()


round(x, n)                               # rounds x to n decimal places
ceiling(x)                                # vector x of smallest integers > x
floor(x)                                  # vector x of largest interger < x
as.integer(x)                             # truncates real x to integers (compare to round(x, 0)
as.integer(x < cutpoint)                  # vector x of 0 if less than cutpoint, 1 if greater than cutpoint)

# vectorised logical
ifelse(test, true_value, false_value)

factor(ifelse(a < cutpoint, "Neg", "Pos"))  # is another way to dichotomize and to make a factor for analysis
bymedian <- with(InsectSprays, reorder(spray, count, median))  # reorders spray factor by the median count of the observations.


transform(data.df, variable names = some operation) # can be part of a set up for a data set

replace(x, list, values)                 # remember to assign this to some object i.e., x <- replace(x,x==-9,NA)
                                         # similar to the operation x[x==-9] <- NA
cut(x, breaks, labels = NULL,
    include.lowest = FALSE, right = TRUE, dig.lab = 3, ...)
cut(1:10, breaks=2, labels=c("low", "high")

######################
# SUBSETTING

# Selecting Observations (Rows)
newdata <- mydata[1:5, ]    # first 5 observerations
# based on variable values
newdata <- mydata[ which(mydata$gender=='F' & mydata$age > 65), ]
newdata <- mydata[c(1,5:10), ]
dat.csv[1:10, "female"]
dat.csv$female[1:10]

# Selecting Variables (columns)
myvars <- c("v1", "v2", "v3")
newdata <- mydata[myvars]
myvars <- names(mydata) %in% c("v1", "v2", "v3")
newdata <- mydata[!myvars]
newdata <- mydata[c(-3,-5)]     # exclude 3rd and 5th variable
mydata$v3 <- mydata$v5 <- NULL  # delete variables v3 and v5

# Subset Function

subset(dataset, logical)                     # those objects meeting a logical criterion
subset(data.df, select=variables, logical)   # get those objects from a data frame that meet a criterion
newdata <- subset(mydata, age >= 20 | age < 10, select=c(ID, Weight))
newdata <- subset(mydata, sex=="m"  & age > 25, select=weight:income)

# Random Samples
# take a random sample of size 50 from a dataset mydata
# sample without replacement
mysample <- mydata[sample(1:nrow(mydata), 50,
       replace=FALSE),]

###########################################
# Sorting

rev(x)           reverses the elements of x
sort(x)           sorts the elements of x in increasing order;
rev(sort(x))     to sort in decreasing order:

order(..., na.last = TRUE, decreasing = FALSE)
order(vector) # returns the indexs in order
order(vector, decreasing = T) # for decreasing
x[order(x$B), ]                              # sort a dataframe by the order of the elements in B
x[rev(order(x$B)), ]                         # sort the dataframe in reverse order
complete <- subset(data.df, complete.cases(data.df)) # find those cases with no missing values

rank(vector)  # as order but handling ties differently
rank(x, na.last = TRUE,
     ties.method = c("average", "first", "random", "max", "min"))

# sorting examples using the mtcars dataset
attach(mtcars)
newdata <- mtcars[order(mpg),] # sort by mpg
newdata <- mtcars[order(mpg, cyl),]  # sort by mpg and cyl
newdata <- mtcars[order(mpg, -cyl),] #sort by mpg (ascending) and cyl (descending)


## R-strings.R

substr(myString, start, stop)

# writing a vector of strings,
write.table(vector_of_interest, file="vector.txt", sep="\n",
            col.names = FALSE, row.names=FALSE, quote=FALSE)

write.table(myData, file = "table.txt", sep = "\t", quote = FALSE)
	with()

	within()


	round(x, n) # rounds x to n decimal places
	ceiling(x) # vector x of smallest integers > x
	floor(x) # vector x of largest interger < x
	as.integer(x) # truncates real x to integers (compare to round(x, 0)
	as.integer(x < cutpoint) # vector x of 0 if less than cutpoint, 1 if greater than cutpoint)

	# vectorised logical
	ifelse(test, true_value, false_value)

	factor(ifelse(a < cutpoint, "Neg", "Pos")) # is another way to dichotomize and to make a factor for analysis
	bymedian <- with(InsectSprays, reorder(spray, count, median)) # reorders spray factor by the median count of the observations.


	transform(data.df, variable names = some operation) # can be part of a set up for a data set

	replace(x, list, values) # remember to assign this to some object i.e., x <- replace(x,x==-9,NA)
	# similar to the operation x[x==-9] <- NA
	cut(x, breaks, labels = NULL,
	include.lowest = FALSE, right = TRUE, dig.lab = 3, ...)
	cut(1:10, breaks=2, labels=c("low", "high")

	######################
	# SUBSETTING

	# Selecting Observations (Rows)
	newdata <- mydata[1:5, ] # first 5 observerations
	# based on variable values
	newdata <- mydata[ which(mydata$gender=='F' & mydata$age > 65), ]
	newdata <- mydata[c(1,5:10), ]
	dat.csv[1:10, "female"]
	dat.csv$female[1:10]

	# Selecting Variables (columns)
	myvars <- c("v1", "v2", "v3")
	newdata <- mydata[myvars]
	myvars <- names(mydata) %in% c("v1", "v2", "v3")
	newdata <- mydata[!myvars]
	newdata <- mydata[c(-3,-5)] # exclude 3rd and 5th variable
	mydata$v3 <- mydata$v5 <- NULL # delete variables v3 and v5

	# Subset Function

	subset(dataset, logical) # those objects meeting a logical criterion
	subset(data.df, select=variables, logical) # get those objects from a data frame that meet a criterion
	newdata <- subset(mydata, age >= 20 \| age < 10, select=c(ID, Weight))
	newdata <- subset(mydata, sex=="m" & age > 25, select=weight:income)

	# Random Samples
	# take a random sample of size 50 from a dataset mydata
	# sample without replacement
	mysample <- mydata[sample(1:nrow(mydata), 50,
	replace=FALSE),]

	###########################################
	# Sorting

	rev(x) reverses the elements of x
	sort(x) sorts the elements of x in increasing order;
	rev(sort(x)) to sort in decreasing order:

	order(..., na.last = TRUE, decreasing = FALSE)
	order(vector) # returns the indexs in order
	order(vector, decreasing = T) # for decreasing
	x[order(x$B), ] # sort a dataframe by the order of the elements in B
	x[rev(order(x$B)), ] # sort the dataframe in reverse order
	complete <- subset(data.df, complete.cases(data.df)) # find those cases with no missing values

	rank(vector) # as order but handling ties differently
	rank(x, na.last = TRUE,
	ties.method = c("average", "first", "random", "max", "min"))

	# sorting examples using the mtcars dataset
	attach(mtcars)
	newdata <- mtcars[order(mpg),] # sort by mpg
	newdata <- mtcars[order(mpg, cyl),] # sort by mpg and cyl
	newdata <- mtcars[order(mpg, -cyl),] #sort by mpg (ascending) and cyl (descending)

	substr(myString, start, stop)

	# writing a vector of strings,
	write.table(vector_of_interest, file="vector.txt", sep="\n",
	col.names = FALSE, row.names=FALSE, quote=FALSE)

	write.table(myData, file = "table.txt", sep = "\t", quote = FALSE)