Skip to content

Instantly share code, notes, and snippets.

@angelovangel
Created February 2, 2017 14:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save angelovangel/867211ac3386ce85ddacdeb09ff91af4 to your computer and use it in GitHub Desktop.
Save angelovangel/867211ac3386ce85ddacdeb09ff91af4 to your computer and use it in GitHub Desktop.
GCMS analysis of olefins in R (specific for Shimadzu GCMS Postrun output)
library(ggplot2)
library(dplyr)
library(reshape2)
#df
n <- as.integer(grep("Header", readLines("ASCIIData.txt")) %>% length()) # number of runs, without the system call above
ole <- readline(prompt="Enter number of olefins detected for these runs(including C30): ")
ole <- as.integer(ole)
df <- data.frame() # make an empty data.frame
skip <- seq(8, length.out = n, by = 20) # tell on which lines is the data, define what to skip in the next step
for (i in skip) {
df <- rbind(df,read.table("ASCIIData.txt", header = F, skip = i, sep="\t", nrows = ole))
}
df <- df %>% select(olefin = V2, area=V10) # take columns 2 and 10 (peak area)
#names
names <- system("grep \"Data File Name\" ASCIIData.txt | cut -f 11 -d \"-\"", intern = TRUE)
tt <- list() # make an empty list
for (i in names) {tt <- cbind(tt,print(rep(i,ole)))} # repeat each sample name `ole` number of times
df$names <- paste(tt) # add sample names to df
#df$names <- substr(df$names,1,3) # in this case, take only sample name, removing replicate info
#df$names <- as.factor(df$names)
rm(tt)
df <- dcast(df,names~olefin, value.var = "area")
head(df)
# calc and plotting
C30conc <- readline(prompt="Enter the concentration of internal standard (µg/ml): ")
C30conc <- as.integer(C30conc)
df.norm <- df[,2:ncol(df)]/df$C30*C30conc # normalize peak areas to C30 peak
df.norm[,ncol(df.norm)] <- NULL # delete standard (last column)
df.norm$total <- rowSums(df.norm) # get total olefins per sample
df.norm$names <- df$names
df.norm$samples <- substr(df.norm$names, 6,8) # in this case the sample names are located in positions 6 to 8 in the names string
# first melt data, then get statistics and plot
df.norm.melt <- df.norm %>% select(-names) %>% melt(id.vars = "samples") # get rid of the "names" column here
df.norm.melt %>% group_by(samples, variable) %>% summarise(mean=mean(value),SD= sd(value),N= n()) %>% write.csv("out-quant-olefins.csv") # mean for each olefin and for total, grouped by sample
p1 <- ggplot(df.norm.melt, aes(x= variable, y= value)) + geom_boxplot(aes(color=samples)) # and so on...
print(p1)
ggsave("plot1.pdf")
ggsave("plot1.svg")
ggsave("plot1.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment