Skip to content

Instantly share code, notes, and snippets.

@chernan
Last active December 21, 2015 10:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save chernan/6291488 to your computer and use it in GitHub Desktop.
Save chernan/6291488 to your computer and use it in GitHub Desktop.
Source code of article : "Quantitative Proteomics Identifies the Membrane-Associated Peroxidase GPx8 as a Cellular Substrate of the Hepatitis C Virus NS3-4A Protease" Morikawa K, Gouttenoire J, Hernandez C, Thi VL, Tran HT, Lange CM, Dill MT, Heim MH, Donzé O, Penin F, Quadroni M, Moradpour D. Hepatology, 2013 Aug 8 doi: http://dx.doi.org/10.100…
#'##############################################################################
#'
#' SliceSILAC pipeline : Step 2
#'
#' This script is part of a pipeline to analyze SliceSILAC data. In particular,
#' it is to be run on sthe output file generated by the script gel_mobility_2-0.pl .
#' It creates visual representation of the SILAC H/L ratios and ratio counts,
#' from a MaxQuant ProteinGroup.txt output generated with a SliceSILAC experimental
#' design.
#' These output images show the presence of a list of proteins in different gel
#' slices. Each presence is represented as a circle where the fill color indicates
#' the H/L SILAC ratio of this protein and the radius indicates the ratio count.
#'
#'
#' Usage:
#' Rscript create_slicesilac_images.R <input_file_name> <output_file_name> <slices_number> <list:of:indexes|all>
#'
#' Rscript tool to run R scripts in the command prompt.
#' This tool is part of the default R installation.
#' create_slicesilac_images.R path to this script
#' input_file_name path to the list of protein groups
#' (MaxQuant output generated with SliceSILAC experimental design).
#' output_file_name any name with the png extension.
#' Selected proteins will be splited in subsets of 20 elements
#' and a different image will be generated for each subset.
#' slices_number number of gel slices
#' all / list:of:indexes list of the protein indexes to be used, separated by ':'
#' 1 corresponds to the index of the first protein in input_file_name
#' This list can be replaced by 'all' if the user wants to visualize all proteins
#'
#'
#' Examples of command line:
#'
#' * Use 'all' proteins listed in a file called 'ProteinGroups_filtered.txt' (located in
#' the same folder as the script) to create an image called 'output.png'. The gel was originally
#' cut into 18 slices.
#'
#' Rscript ./create_slicesilac_images.R ./ProteinGroups_filtered.txt output.png 18 all
#'
#' * Same as above but use only proteins whose index is 1:2:6:3 in ProteinGroup.txt.
#' Note that indexes do not need to be listed in ascending order
#'
#' Rscript ./create_slicesilac_images.R ./ProteinGroups_filtered.txt output.png 18 1:2:6:3
#'
#' Script version : 1.0
#' Date: 22-Aug-2012
#' Author: Celine Hernandez
#' Contact: wwwpaf@unil.ch
#'
#'##############################################################################
#' Create a png image from SliceSILAC data.
#'
#' This function was freely adapted from Taiyun Wei's circle.correlation function
#' drawing correlation matrix.
#' http://addictedtor.free.fr/graphiques/graphcode.php?graph=152
#'
#' @param ratios matrix of
#' @param counts
#' @param col scale of colors to fill circles, from 1 to -1, vector
#'
#' @param bg background color of graph
#' @param cex numeric, for the variable names
#' @param title title of the graph
#' @param legend_str text of the legend
#'
#' @param filename name of the output image to be created, png format
#'
#' @param ... extra parameters, currenlty ignored
#'
draw.slicesilac <- function(ratios, counts, col=c("black","white"),
bg = "white", cex = 1, title = "", legend_str = "Proteins",
filename = "mygraph.png", ...){
#' check parameters
if (is.null(ratios))
return(invisible())
if ((!is.matrix(ratios)) )
stop("Need a matrix!")
#' set up variable names
nb_row_values <- nrow(ratios)
nb_col_values <- ncol(ratios)
rows_names <- rownames(ratios)
cols_names <- colnames(ratios)
if (is.null(rows_names))
rows_names <- 1:nb_row_values
if (is.null(cols_names))
cols_names <- 1:nb_col_values
rows_names <- as.character(rows_names)
cols_names <- as.character(cols_names)
#' setup output image
#default display parameters
par(mai=c(0, 0, 0, 0), mar = c(1, 1, 1, 1), bg = bg, oma=c(0, 0, 0, 0))
#select output format (comment out if pdf desired) ; needs par() to be called before
png(filename, bg = bg, res=100, width=960, height=60+31*nb_row_values+300)
#set again display parameters for the newly selected output format
par(mai=c(0, 0, 0, 0), mar = c(1, 1, 1, 1), bg = bg, oma=c(0, 0, 0, 0))
#' draw labels and grid
## calculate label-text width approximately
plot.new()
plot.window(c(0, nb_col_values), c(0, nb_row_values), asp = 1)
xlabwidth <- max(strwidth(rows_names, cex = cex))
ylabwidth <- max(strwidth(cols_names, cex = cex))
## set up an empty plot with the appropriate dimensions
plot.window(c(-xlabwidth + 0.5, nb_col_values + 0.5),
c(-(2+nb_col_values)/2, nb_row_values + 1 + ylabwidth),
asp = 1)
rect(0.5, 0.5, nb_col_values + 0.5, nb_row_values + 0.5, col = bg) ##background color
## add variable names and title
text(rep(-xlabwidth/2 -0.5, nb_row_values), nb_row_values:1, rows_names, col = "black", cex = cex)
text(1:nb_col_values, rep(nb_row_values + 1 + ylabwidth/2, nb_col_values), cols_names, srt = 90, col = "red",
cex = cex)
title(title)
## add grid
segments(rep(0.5, nb_row_values + 1), 0.5 + 0:nb_row_values, rep(nb_col_values + 0.5, nb_row_values + 1),
0.5 + 0:nb_row_values, col = "gray")
segments(0.5 + 0:nb_col_values, rep(0.5, nb_col_values + 1), 0.5 + 0:nb_col_values, rep(nb_row_values + 0.5,
nb_col_values), col = "gray")
#' assign circles' fill color, depend on argument col
# calculate nb of colors given in col
nc <- length(col)
#if only one color, use it for all circles
if(nc==1)
bg <- rep(col, nb_row_values*nb_col_values)
#if multiple colors
else{
#create a scale of thresholds corresponding to each color
ff <- seq(-1,1, length=nc+1)
#assign a default color index for each ratio in bg2
bg2 = rep(0, nb_row_values * nb_col_values)
#for each ratio
for (i in 1:(nb_row_values * nb_col_values)){
#rank each ratio into the scale (-1;1) in ff
# [nc] is the position of the current ratio into the scale of ff
bg2[i] <- rank( c(ff[2:nc],as.vector(ratios)[i]), ties.method = "random")[nc]
}
#put in bg the effective colors to be used, depending on the index in bg2
bg <- (col[nc:1])[bg2]
}
#' draw nb_row_values*nb_col_values circles using vector language, suggested by Yihui Xie
#' the area of circles denotes the absolute value of ratio, but is limited in size
#compute all circle radius
all_circles_elmts <- sqrt(abs(counts/20))/2
#doesn't display a circle if count is 0
all_circles_elmts[counts==0] <- NA
#doesn't display a circle if ratio is NA (yes... sometimes 1 count and no ratio)
all_circles_elmts[is.na(ratios)] <- NA
#limit upper size of circles for 30 counts
max_radius <- rep(sqrt(abs(30/20))/2, nb_row_values*nb_col_values)
dim(max_radius) <- dim(counts)
all_circles_elmts <- pmin(all_circles_elmts, max_radius)
#draw circles
symbols(rep(1:nb_col_values, each = nb_row_values), rep(nb_row_values:1, nb_col_values), add = TRUE, inches = F,
circles = as.vector(all_circles_elmts), bg = as.vector(bg))
#add text legend
legend(x=-xlabwidth, y=0, legend=legend_str, cex=0.8, bty="n")
#add color legend (plotrix function)
text(nb_col_values+0.5, 6, "H/L ratios (log2)", pos=4)
col.labels <- ""
color.legend(nb_col_values+1.3, 2, nb_col_values+2, 5, col.labels, col, align="rb", gradient="y")
col.labels <- c( expression(''<=-1), "0", expression(1<='') )
text(c( nb_col_values+2.5, nb_col_values+2.5, nb_col_values+2.5 ),
c( 2, 3.5, 5 ),
col.labels, pos=4)
#add size legend
text(nb_col_values+0.5, 12, "H/L ratios count", pos=4)
symbols(nb_col_values+1.5, 11, add = TRUE, inches = F, circles = sqrt(abs(1/20))/2, bg = "white")
text(nb_col_values+2.5, 11, "1", pos=4)
symbols(nb_col_values+1.5, 10, add = TRUE, inches = F, circles = sqrt(abs(10/20))/2, bg = "white")
text(nb_col_values+2.5, 10, "10", pos=4)
symbols(nb_col_values+1.5, 9, add = TRUE, inches = F, circles = sqrt(abs(20/20))/2, bg = "white")
text(nb_col_values+2.5, 9, "20", pos=4)
symbols(nb_col_values+1.5, 8, add = TRUE, inches = F, circles = sqrt(abs(30/20))/2, bg = "white")
text(nb_col_values+2.5, 8, expression(''>=30), pos=4)
dev.off() #comment if only pdf
}
#'####################################################################################################
#' Analysis of SliceSILAC data
library("plotrix")
#' read command line arguments
options <- commandArgs(trailingOnly = T)
if(length(options) != 4)
stop('\n\nUsage:\n Rscript create_slicesilac_images.R <input_file_name> <output_file_name> <slices_number> <list:of:indexes|all>\n\nThis script needs four arguments.\nSee internal documentation for more details and examples of command lines.\n\n')
# location of the filtered ProteinGroup.txt
input_file <- options[1]
# name of the image to be created
output_file <- options[2]
# number of slices in this experiment
slices_number <- as.numeric(options[3])
# index of the proteins to include in the graph (':' separated), or 'all'
prot_indexes_str <- options[4]
#' read input file and store the whole protein table in a variable
ProteinGroups <- read.delim(input_file, header=TRUE, sep="\t", fill=TRUE, row.names=1, stringsAsFactors=FALSE, na.strings='')
#' select indexes of proteins to be used depending on third argument
#if argument is a list of indexes separated by the character ':'
if(prot_indexes_str != "all")
prot_indexes <- lapply( strsplit( prot_indexes_str, split = ":" ), as.numeric)
#if argument is 'all', get all indexes of the proteins
if(prot_indexes_str == "all")
prot_indexes <- list(A=1:nrow(as.matrix(ProteinGroups[,1])))
#' list of slice numbers to be used for the search of columns of interest
slices_list <- c()
if(as.numeric(slices_number) < 10) {
slices_list <- c(paste('0',1:slices_number, sep=""))
}
if(as.numeric(slices_number) > 9) {
slices_list <- c(paste('0',1:9, sep=""),10:slices_number)
}
#' select important informations from the protein table
# Protein names (limited until first ';' character) for the legend
interesting_colnames_names <- c("Protein.Names")
ProteinGroups_names <-
subset(ProteinGroups,
TRUE,
select = interesting_colnames_names
)
temp_names <- as.matrix(ProteinGroups_names[prot_indexes[[1]],])
temp_names <- apply(temp_names, 1,
function (x) {
if(is.na(x)) { as.character(x)}
else {
matches <- as.matrix(regexpr(";", x))
if(matches[1,1]!=-1) {
paste(substr(x, 0, matches[1]), "...")
}
else {
as.character(x)
}
}
}
)
temp_names <- paste(prot_indexes[[1]], temp_names)
subset_ProteinGroups_names <- as.matrix(t(temp_names ) )
# Ratios for the color of circles
interesting_colnames_ratios <- paste("Ratio.H.L.Normalized.band",slices_list, sep="")
ProteinGroups_ratios <-
subset(ProteinGroups,
TRUE,
select = interesting_colnames_ratios
)
subset_ProteinGroups_ratios <- as.matrix(t( ProteinGroups_ratios[prot_indexes[[1]],]) )
subset_ProteinGroups_ratios <- log2(round(subset_ProteinGroups_ratios,3))
## Counts for the radius of circles
interesting_colnames_counts <- paste("Ratio.H.L.Count.band",slices_list, sep="")
ProteinGroups_counts <-
subset(ProteinGroups,
TRUE,
select = interesting_colnames_counts
)
subset_ProteinGroups_counts <- as.matrix(t( ProteinGroups_counts[prot_indexes[[1]],]) )
## shape table by mofifying headers, etc.
colnames(subset_ProteinGroups_names) <- prot_indexes[[1]]
colnames(subset_ProteinGroups_ratios) <- prot_indexes[[1]]
rownames(subset_ProteinGroups_ratios) <- paste("Slice ",1:slices_number)
colnames(subset_ProteinGroups_counts) <- prot_indexes[[1]]
rownames(subset_ProteinGroups_counts) <- paste("Slice ",1:slices_number)
## uncomment to display selected information
#subset_ProteinGroups_names
#subset_ProteinGroups_ratios
#subset_ProteinGroups_counts
#' generate image for each subset of 20 proteins
nb_col_tot <- ncol(subset_ProteinGroups_names)
nb_images <- ceiling(nb_col_tot/20)
for (i in 1:nb_images) {
# calculate min and max indexes
min <- (i-1)*20+1
max <- min(nb_col_tot, i*20)
# extract subset
subset_ratios <- as.matrix(subset_ProteinGroups_ratios[,min:max])
subset_counts <- as.matrix(subset_ProteinGroups_counts[,min:max])
subset_names <- as.matrix(subset_ProteinGroups_names[,min:max])
# generate images
draw.slicesilac(subset_ratios, subset_counts, col = colorRampPalette(c("red","white","green"))(100) ,
bg = "white", filename = paste( i, '_', output_file, sep=""),
legend_str=subset_names)
}
#End of script
#############################################################################################
#
# SliceSILAC pipeline : Step 1
#
# Parse tab files from MaxQuant to screen ratios band by band.
# Look for entries for which at least 2 ratios are following these criteria:
# - at least one ratio below 'less_value'
# - at least one ratio above 'more_value'
#
#
# Usage:
# perl ./gel_mobility_2-0.pl <input_file_name> <less_value> <more_value> <slice_number> output_file_name>
#
# perl Perl interpreter
# input_file_name path to the list of protein groups
# (MaxQuant output as generated with SliceSILAC experimental design).
# less_value threshold for SILAC H/L ratios
# more_value threshold for SILAC H/L ratios
# output_file_name path to the output file
#
#
# Example of command line:
#
# Select protein groups with at least one ratio below 0.8 and one above 1.0 among 18 slices,
# from file 'proteinGroups.txt' located in the same folder. The filtered list will be saved
# in a file named 'filtered_output.txt'.
#
# perl gel_mobility_2-0.pl ./proteinGroups.txt 0.8 1.0 18 filtered_output.txt
#
#
# Script version : 2.0
# Date: 27-May-2011
# Author: Manfredo Quadroni
# Contact: wwwpaf@unil.ch
#
#############################################################################################
#!/usr/bin/perl
my($tabfile, $less_value, $more_value, $slice_nb, $output_filename) = @ARGV;
#open input file
open( INPUTF, '<', $tabfile ) || die("Cannot open $tabfile in r mode: $! \n");
#open output list file (selected protein groups)
open( OUTPUTF, '>', $output_filename ) || die("Cannot open $output_filename in w mode: $! \n");
#open file for miscellaneous debug information
open( OUTPUTFC, '> debug.txt' ) || die("Cannot open debug.txt in w mode: $! \n");
$index = 0;
#read all input file
@roba = <INPUTF>;
print OUTPUTFC "Input file : \t","$tabfile\n";
print OUTPUTFC "Less value : \t","$less_value\n";
print OUTPUTFC "More value : \t","$more_value\n";
print OUTPUTFC "Number of slices : \t","$slice_nb\n";
print OUTPUTFC "Output file : \t","$output_filename\n";
foreach $line (@roba) {
chomp($line);
if ( $index eq 0 ) {
print OUTPUTF "$line\n";
@headercut = split( /\t/, $line );
for ( $i = 0 ; $i < 900 ; $i += 1 ) #scan header
{
if ( $headercut[$i] =~ m/Normalized band/ ) {
push( @colsratio, $i );
}
; # @colsratio is list of column numbers containing ratios
if ( $headercut[$i] =~ m/Count band/ ) { push( @colscount, $i ); }
; # @colscount is list of column numbers containing counts
}
print OUTPUTFC "\ncolumns w. ratios\n";
print OUTPUTFC "@colsratio\n", "\ncolumns with counts\n";
print OUTPUTFC "@colscount\n", "\nmmmmmmmmm\n";
print OUTPUTFC "\ncolumns w. ratios\t", "@colsratio\n",
"\ncolumns with counts\n", "@colscount\n", "\nmmmmmmmmm\n";
}
else {
$loratio = 0;
$hiratio = 0;
@cut = split( /\t/, $line );
for ( $k = 0 ; $k < $slice_nb ; $k += 1 ) {
$coluindex = $colsratio[$k];
$rr[$k] = $cut[$coluindex];
}; # $rr[$k] is ratio for band $k
for ( $k = 0 ; $k < $slice_nb ; $k += 1 ) {
$countindex = $colscount[$k];
$cc[$k] = $cut[$countindex];
}; # $cc[$k] is count for band $k
print OUTPUTFC "$cut[0]\t", "ratios\t", "@rr\n";
print OUTPUTFC "$cut[0]\t", "counts\t", "@cc\n";
for ( $k = 0 ; $k < $slice_nb ; $k += 1 ) { # $k contains band number index -1
if ( ( $rr[$k] <= $less_value ) and ( $cc[$k] >= 1 ) and ( $rr[$k] ne "" ) )
{
$loratio += 1;
print OUTPUTFC "$cut[0]\t", "$colsratio[$k]\t", "$rr[$k]\t", "lora\t",
"$loratio\n";
}
; #"$rr[$k]\t","$cc[$k]\t","oooo\t";};
if ( ( $rr[$k] > $more_value ) and ( $cc[$k] >= 1 ) ) {
$hiratio += 1;
print OUTPUTFC "$cut[0]\t", "$colsratio[$k]\t", "$rr[$k]\t", "hira\t",
"$hiratio\n";
}
; #"$rr[$k]\t","$cc[$k]\n";};
}
# BEWARE : do not set if (($rr[$k] <= 0.8) and ($cc[$k] > 0)) : it will take all void cases "" as good for loratio
}
if ( ( $loratio > 0 ) and ( $hiratio > 0 ) ) {
print OUTPUTF "$line\n";
print OUTPUTFC "ID ", "$cut[0]\t", "taken\n";
#print "ID ", "$cut[0]\t", "taken\n";
}
$loratio = 0;
$hiratio = 0;
$index++;
}
#close all files
close(OUTPUTF);
close(INPUTF);
close(OUTPUTFC);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment