Created
February 2, 2017 14:33
-
-
Save angelovangel/867211ac3386ce85ddacdeb09ff91af4 to your computer and use it in GitHub Desktop.
GCMS analysis of olefins in R (specific for Shimadzu GCMS Postrun output)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(dplyr) | |
library(reshape2) | |
#df | |
n <- as.integer(grep("Header", readLines("ASCIIData.txt")) %>% length()) # number of runs, without the system call above | |
ole <- readline(prompt="Enter number of olefins detected for these runs(including C30): ") | |
ole <- as.integer(ole) | |
df <- data.frame() # make an empty data.frame | |
skip <- seq(8, length.out = n, by = 20) # tell on which lines is the data, define what to skip in the next step | |
for (i in skip) { | |
df <- rbind(df,read.table("ASCIIData.txt", header = F, skip = i, sep="\t", nrows = ole)) | |
} | |
df <- df %>% select(olefin = V2, area=V10) # take columns 2 and 10 (peak area) | |
#names | |
names <- system("grep \"Data File Name\" ASCIIData.txt | cut -f 11 -d \"-\"", intern = TRUE) | |
tt <- list() # make an empty list | |
for (i in names) {tt <- cbind(tt,print(rep(i,ole)))} # repeat each sample name `ole` number of times | |
df$names <- paste(tt) # add sample names to df | |
#df$names <- substr(df$names,1,3) # in this case, take only sample name, removing replicate info | |
#df$names <- as.factor(df$names) | |
rm(tt) | |
df <- dcast(df,names~olefin, value.var = "area") | |
head(df) | |
# calc and plotting | |
C30conc <- readline(prompt="Enter the concentration of internal standard (µg/ml): ") | |
C30conc <- as.integer(C30conc) | |
df.norm <- df[,2:ncol(df)]/df$C30*C30conc # normalize peak areas to C30 peak | |
df.norm[,ncol(df.norm)] <- NULL # delete standard (last column) | |
df.norm$total <- rowSums(df.norm) # get total olefins per sample | |
df.norm$names <- df$names | |
df.norm$samples <- substr(df.norm$names, 6,8) # in this case the sample names are located in positions 6 to 8 in the names string | |
# first melt data, then get statistics and plot | |
df.norm.melt <- df.norm %>% select(-names) %>% melt(id.vars = "samples") # get rid of the "names" column here | |
df.norm.melt %>% group_by(samples, variable) %>% summarise(mean=mean(value),SD= sd(value),N= n()) %>% write.csv("out-quant-olefins.csv") # mean for each olefin and for total, grouped by sample | |
p1 <- ggplot(df.norm.melt, aes(x= variable, y= value)) + geom_boxplot(aes(color=samples)) # and so on... | |
print(p1) | |
ggsave("plot1.pdf") | |
ggsave("plot1.svg") | |
ggsave("plot1.png") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment