angelovangel/olefins-analysis.R

## olefins-analysis.R
library(ggplot2)
library(dplyr)
library(reshape2)

#df
n <- as.integer(grep("Header", readLines("ASCIIData.txt")) %>% length()) # number of runs, without the system call above

ole <- readline(prompt="Enter number of olefins detected for these runs(including C30): ")
ole <- as.integer(ole)

df <- data.frame() # make an empty data.frame
skip <- seq(8, length.out = n, by = 20) # tell on which lines is the data, define what to skip in the next step
for (i in skip) {
  df <- rbind(df,read.table("ASCIIData.txt", header = F, skip = i, sep="\t", nrows = ole))
}
df <- df %>% select(olefin = V2, area=V10) # take columns 2 and 10 (peak area)

#names
names <- system("grep \"Data File Name\" ASCIIData.txt | cut -f 11 -d \"-\"", intern = TRUE)
tt <- list() # make an empty list
for (i in names) {tt <- cbind(tt,print(rep(i,ole)))} # repeat each sample name `ole` number of times
df$names <- paste(tt) # add sample names to df
#df$names <- substr(df$names,1,3) # in this case, take only sample name, removing replicate info
#df$names <- as.factor(df$names)
rm(tt)
df <- dcast(df,names~olefin, value.var = "area")
head(df)

# calc and plotting
C30conc <- readline(prompt="Enter the concentration of internal standard (µg/ml): ")
C30conc <- as.integer(C30conc)
df.norm <- df[,2:ncol(df)]/df$C30*C30conc # normalize peak areas to C30 peak
df.norm[,ncol(df.norm)] <- NULL # delete standard (last column)
df.norm$total <- rowSums(df.norm) # get total olefins per sample
df.norm$names <- df$names
df.norm$samples <- substr(df.norm$names, 6,8) # in this case the sample names are located in positions 6 to 8 in the names string

# first melt data, then get statistics and plot
df.norm.melt <- df.norm %>% select(-names) %>% melt(id.vars = "samples") # get rid of the "names" column here
df.norm.melt %>% group_by(samples, variable) %>% summarise(mean=mean(value),SD= sd(value),N= n()) %>% write.csv("out-quant-olefins.csv") # mean for each olefin and for total, grouped by sample
p1 <- ggplot(df.norm.melt, aes(x= variable, y= value)) + geom_boxplot(aes(color=samples)) # and so on...
print(p1)
ggsave("plot1.pdf")
ggsave("plot1.svg")
ggsave("plot1.png")
	library(ggplot2)
	library(dplyr)
	library(reshape2)

	#df
	n <- as.integer(grep("Header", readLines("ASCIIData.txt")) %>% length()) # number of runs, without the system call above

	ole <- readline(prompt="Enter number of olefins detected for these runs(including C30): ")
	ole <- as.integer(ole)

	df <- data.frame() # make an empty data.frame
	skip <- seq(8, length.out = n, by = 20) # tell on which lines is the data, define what to skip in the next step
	for (i in skip) {
	df <- rbind(df,read.table("ASCIIData.txt", header = F, skip = i, sep="\t", nrows = ole))
	}
	df <- df %>% select(olefin = V2, area=V10) # take columns 2 and 10 (peak area)

	#names
	names <- system("grep \"Data File Name\" ASCIIData.txt \| cut -f 11 -d \"-\"", intern = TRUE)
	tt <- list() # make an empty list
	for (i in names) {tt <- cbind(tt,print(rep(i,ole)))} # repeat each sample name `ole` number of times
	df$names <- paste(tt) # add sample names to df
	#df$names <- substr(df$names,1,3) # in this case, take only sample name, removing replicate info
	#df$names <- as.factor(df$names)
	rm(tt)
	df <- dcast(df,names~olefin, value.var = "area")
	head(df)

	# calc and plotting
	C30conc <- readline(prompt="Enter the concentration of internal standard (µg/ml): ")
	C30conc <- as.integer(C30conc)
	df.norm <- df[,2:ncol(df)]/df$C30*C30conc # normalize peak areas to C30 peak
	df.norm[,ncol(df.norm)] <- NULL # delete standard (last column)
	df.norm$total <- rowSums(df.norm) # get total olefins per sample
	df.norm$names <- df$names
	df.norm$samples <- substr(df.norm$names, 6,8) # in this case the sample names are located in positions 6 to 8 in the names string

	# first melt data, then get statistics and plot
	df.norm.melt <- df.norm %>% select(-names) %>% melt(id.vars = "samples") # get rid of the "names" column here
	df.norm.melt %>% group_by(samples, variable) %>% summarise(mean=mean(value),SD= sd(value),N= n()) %>% write.csv("out-quant-olefins.csv") # mean for each olefin and for total, grouped by sample
	p1 <- ggplot(df.norm.melt, aes(x= variable, y= value)) + geom_boxplot(aes(color=samples)) # and so on...
	print(p1)
	ggsave("plot1.pdf")
	ggsave("plot1.svg")
	ggsave("plot1.png")