-
-
Save PAFGit/6291977 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#'############################################################################## | |
#' | |
#' SliceSILAC pipeline : Step 2 | |
#' | |
#' This script is part of a pipeline to analyze SliceSILAC data. In particular, | |
#' it is to be run on sthe output file generated by the script gel_mobility_2-0.pl . | |
#' It creates visual representation of the SILAC H/L ratios and ratio counts, | |
#' from a MaxQuant ProteinGroup.txt output generated with a SliceSILAC experimental | |
#' design. | |
#' These output images show the presence of a list of proteins in different gel | |
#' slices. Each presence is represented as a circle where the fill color indicates | |
#' the H/L SILAC ratio of this protein and the radius indicates the ratio count. | |
#' | |
#' | |
#' Usage: | |
#' Rscript create_slicesilac_images.R <input_file_name> <output_file_name> <slices_number> <list:of:indexes|all> | |
#' | |
#' Rscript tool to run R scripts in the command prompt. | |
#' This tool is part of the default R installation. | |
#' create_slicesilac_images.R path to this script | |
#' input_file_name path to the list of protein groups | |
#' (MaxQuant output generated with SliceSILAC experimental design). | |
#' output_file_name any name with the png extension. | |
#' Selected proteins will be splited in subsets of 20 elements | |
#' and a different image will be generated for each subset. | |
#' slices_number number of gel slices | |
#' all / list:of:indexes list of the protein indexes to be used, separated by ':' | |
#' 1 corresponds to the index of the first protein in input_file_name | |
#' This list can be replaced by 'all' if the user wants to visualize all proteins | |
#' | |
#' | |
#' Examples of command line: | |
#' | |
#' * Use 'all' proteins listed in a file called 'ProteinGroups_filtered.txt' (located in | |
#' the same folder as the script) to create an image called 'output.png'. The gel was originally | |
#' cut into 18 slices. | |
#' | |
#' Rscript ./create_slicesilac_images.R ./ProteinGroups_filtered.txt output.png 18 all | |
#' | |
#' * Same as above but use only proteins whose index is 1:2:6:3 in ProteinGroup.txt. | |
#' Note that indexes do not need to be listed in ascending order | |
#' | |
#' Rscript ./create_slicesilac_images.R ./ProteinGroups_filtered.txt output.png 18 1:2:6:3 | |
#' | |
#' Script version : 1.0 | |
#' Date: 22-Aug-2012 | |
#' Author: Celine Hernandez | |
#' Contact: wwwpaf@unil.ch | |
#' | |
#'############################################################################## | |
#' Create a png image from SliceSILAC data. | |
#' | |
#' This function was freely adapted from Taiyun Wei's circle.correlation function | |
#' drawing correlation matrix. | |
#' http://addictedtor.free.fr/graphiques/graphcode.php?graph=152 | |
#' | |
#' @param ratios matrix of | |
#' @param counts | |
#' @param col scale of colors to fill circles, from 1 to -1, vector | |
#' | |
#' @param bg background color of graph | |
#' @param cex numeric, for the variable names | |
#' @param title title of the graph | |
#' @param legend_str text of the legend | |
#' | |
#' @param filename name of the output image to be created, png format | |
#' | |
#' @param ... extra parameters, currenlty ignored | |
#' | |
draw.slicesilac <- function(ratios, counts, col=c("black","white"), | |
bg = "white", cex = 1, title = "", legend_str = "Proteins", | |
filename = "mygraph.png", ...){ | |
#' check parameters | |
if (is.null(ratios)) | |
return(invisible()) | |
if ((!is.matrix(ratios)) ) | |
stop("Need a matrix!") | |
#' set up variable names | |
nb_row_values <- nrow(ratios) | |
nb_col_values <- ncol(ratios) | |
rows_names <- rownames(ratios) | |
cols_names <- colnames(ratios) | |
if (is.null(rows_names)) | |
rows_names <- 1:nb_row_values | |
if (is.null(cols_names)) | |
cols_names <- 1:nb_col_values | |
rows_names <- as.character(rows_names) | |
cols_names <- as.character(cols_names) | |
#' setup output image | |
#default display parameters | |
par(mai=c(0, 0, 0, 0), mar = c(1, 1, 1, 1), bg = bg, oma=c(0, 0, 0, 0)) | |
#select output format (comment out if pdf desired) ; needs par() to be called before | |
png(filename, bg = bg, res=100, width=960, height=60+31*nb_row_values+300) | |
#set again display parameters for the newly selected output format | |
par(mai=c(0, 0, 0, 0), mar = c(1, 1, 1, 1), bg = bg, oma=c(0, 0, 0, 0)) | |
#' draw labels and grid | |
## calculate label-text width approximately | |
plot.new() | |
plot.window(c(0, nb_col_values), c(0, nb_row_values), asp = 1) | |
xlabwidth <- max(strwidth(rows_names, cex = cex)) | |
ylabwidth <- max(strwidth(cols_names, cex = cex)) | |
## set up an empty plot with the appropriate dimensions | |
plot.window(c(-xlabwidth + 0.5, nb_col_values + 0.5), | |
c(-(2+nb_col_values)/2, nb_row_values + 1 + ylabwidth), | |
asp = 1) | |
rect(0.5, 0.5, nb_col_values + 0.5, nb_row_values + 0.5, col = bg) ##background color | |
## add variable names and title | |
text(rep(-xlabwidth/2 -0.5, nb_row_values), nb_row_values:1, rows_names, col = "black", cex = cex) | |
text(1:nb_col_values, rep(nb_row_values + 1 + ylabwidth/2, nb_col_values), cols_names, srt = 90, col = "red", | |
cex = cex) | |
title(title) | |
## add grid | |
segments(rep(0.5, nb_row_values + 1), 0.5 + 0:nb_row_values, rep(nb_col_values + 0.5, nb_row_values + 1), | |
0.5 + 0:nb_row_values, col = "gray") | |
segments(0.5 + 0:nb_col_values, rep(0.5, nb_col_values + 1), 0.5 + 0:nb_col_values, rep(nb_row_values + 0.5, | |
nb_col_values), col = "gray") | |
#' assign circles' fill color, depend on argument col | |
# calculate nb of colors given in col | |
nc <- length(col) | |
#if only one color, use it for all circles | |
if(nc==1) | |
bg <- rep(col, nb_row_values*nb_col_values) | |
#if multiple colors | |
else{ | |
#create a scale of thresholds corresponding to each color | |
ff <- seq(-1,1, length=nc+1) | |
#assign a default color index for each ratio in bg2 | |
bg2 = rep(0, nb_row_values * nb_col_values) | |
#for each ratio | |
for (i in 1:(nb_row_values * nb_col_values)){ | |
#rank each ratio into the scale (-1;1) in ff | |
# [nc] is the position of the current ratio into the scale of ff | |
bg2[i] <- rank( c(ff[2:nc],as.vector(ratios)[i]), ties.method = "random")[nc] | |
} | |
#put in bg the effective colors to be used, depending on the index in bg2 | |
bg <- (col[nc:1])[bg2] | |
} | |
#' draw nb_row_values*nb_col_values circles using vector language, suggested by Yihui Xie | |
#' the area of circles denotes the absolute value of ratio, but is limited in size | |
#compute all circle radius | |
all_circles_elmts <- sqrt(abs(counts/20))/2 | |
#doesn't display a circle if count is 0 | |
all_circles_elmts[counts==0] <- NA | |
#doesn't display a circle if ratio is NA (yes... sometimes 1 count and no ratio) | |
all_circles_elmts[is.na(ratios)] <- NA | |
#limit upper size of circles for 30 counts | |
max_radius <- rep(sqrt(abs(30/20))/2, nb_row_values*nb_col_values) | |
dim(max_radius) <- dim(counts) | |
all_circles_elmts <- pmin(all_circles_elmts, max_radius) | |
#draw circles | |
symbols(rep(1:nb_col_values, each = nb_row_values), rep(nb_row_values:1, nb_col_values), add = TRUE, inches = F, | |
circles = as.vector(all_circles_elmts), bg = as.vector(bg)) | |
#add text legend | |
legend(x=-xlabwidth, y=0, legend=legend_str, cex=0.8, bty="n") | |
#add color legend (plotrix function) | |
text(nb_col_values+0.5, 6, "H/L ratios (log2)", pos=4) | |
col.labels <- "" | |
color.legend(nb_col_values+1.3, 2, nb_col_values+2, 5, col.labels, col, align="rb", gradient="y") | |
col.labels <- c( expression(''<=-1), "0", expression(1<='') ) | |
text(c( nb_col_values+2.5, nb_col_values+2.5, nb_col_values+2.5 ), | |
c( 2, 3.5, 5 ), | |
col.labels, pos=4) | |
#add size legend | |
text(nb_col_values+0.5, 12, "H/L ratios count", pos=4) | |
symbols(nb_col_values+1.5, 11, add = TRUE, inches = F, circles = sqrt(abs(1/20))/2, bg = "white") | |
text(nb_col_values+2.5, 11, "1", pos=4) | |
symbols(nb_col_values+1.5, 10, add = TRUE, inches = F, circles = sqrt(abs(10/20))/2, bg = "white") | |
text(nb_col_values+2.5, 10, "10", pos=4) | |
symbols(nb_col_values+1.5, 9, add = TRUE, inches = F, circles = sqrt(abs(20/20))/2, bg = "white") | |
text(nb_col_values+2.5, 9, "20", pos=4) | |
symbols(nb_col_values+1.5, 8, add = TRUE, inches = F, circles = sqrt(abs(30/20))/2, bg = "white") | |
text(nb_col_values+2.5, 8, expression(''>=30), pos=4) | |
dev.off() #comment if only pdf | |
} | |
#'#################################################################################################### | |
#' Analysis of SliceSILAC data | |
library("plotrix") | |
#' read command line arguments | |
options <- commandArgs(trailingOnly = T) | |
if(length(options) != 4) | |
stop('\n\nUsage:\n Rscript create_slicesilac_images.R <input_file_name> <output_file_name> <slices_number> <list:of:indexes|all>\n\nThis script needs four arguments.\nSee internal documentation for more details and examples of command lines.\n\n') | |
# location of the filtered ProteinGroup.txt | |
input_file <- options[1] | |
# name of the image to be created | |
output_file <- options[2] | |
# number of slices in this experiment | |
slices_number <- as.numeric(options[3]) | |
# index of the proteins to include in the graph (':' separated), or 'all' | |
prot_indexes_str <- options[4] | |
#' read input file and store the whole protein table in a variable | |
ProteinGroups <- read.delim(input_file, header=TRUE, sep="\t", fill=TRUE, row.names=1, stringsAsFactors=FALSE, na.strings='') | |
#' select indexes of proteins to be used depending on third argument | |
#if argument is a list of indexes separated by the character ':' | |
if(prot_indexes_str != "all") | |
prot_indexes <- lapply( strsplit( prot_indexes_str, split = ":" ), as.numeric) | |
#if argument is 'all', get all indexes of the proteins | |
if(prot_indexes_str == "all") | |
prot_indexes <- list(A=1:nrow(as.matrix(ProteinGroups[,1]))) | |
#' list of slice numbers to be used for the search of columns of interest | |
slices_list <- c() | |
if(as.numeric(slices_number) < 10) { | |
slices_list <- c(paste('0',1:slices_number, sep="")) | |
} | |
if(as.numeric(slices_number) > 9) { | |
slices_list <- c(paste('0',1:9, sep=""),10:slices_number) | |
} | |
#' select important informations from the protein table | |
# Protein names (limited until first ';' character) for the legend | |
interesting_colnames_names <- c("Protein.Names") | |
ProteinGroups_names <- | |
subset(ProteinGroups, | |
TRUE, | |
select = interesting_colnames_names | |
) | |
temp_names <- as.matrix(ProteinGroups_names[prot_indexes[[1]],]) | |
temp_names <- apply(temp_names, 1, | |
function (x) { | |
if(is.na(x)) { as.character(x)} | |
else { | |
matches <- as.matrix(regexpr(";", x)) | |
if(matches[1,1]!=-1) { | |
paste(substr(x, 0, matches[1]), "...") | |
} | |
else { | |
as.character(x) | |
} | |
} | |
} | |
) | |
temp_names <- paste(prot_indexes[[1]], temp_names) | |
subset_ProteinGroups_names <- as.matrix(t(temp_names ) ) | |
# Ratios for the color of circles | |
interesting_colnames_ratios <- paste("Ratio.H.L.Normalized.band",slices_list, sep="") | |
ProteinGroups_ratios <- | |
subset(ProteinGroups, | |
TRUE, | |
select = interesting_colnames_ratios | |
) | |
subset_ProteinGroups_ratios <- as.matrix(t( ProteinGroups_ratios[prot_indexes[[1]],]) ) | |
subset_ProteinGroups_ratios <- log2(round(subset_ProteinGroups_ratios,3)) | |
## Counts for the radius of circles | |
interesting_colnames_counts <- paste("Ratio.H.L.Count.band",slices_list, sep="") | |
ProteinGroups_counts <- | |
subset(ProteinGroups, | |
TRUE, | |
select = interesting_colnames_counts | |
) | |
subset_ProteinGroups_counts <- as.matrix(t( ProteinGroups_counts[prot_indexes[[1]],]) ) | |
## shape table by mofifying headers, etc. | |
colnames(subset_ProteinGroups_names) <- prot_indexes[[1]] | |
colnames(subset_ProteinGroups_ratios) <- prot_indexes[[1]] | |
rownames(subset_ProteinGroups_ratios) <- paste("Slice ",1:slices_number) | |
colnames(subset_ProteinGroups_counts) <- prot_indexes[[1]] | |
rownames(subset_ProteinGroups_counts) <- paste("Slice ",1:slices_number) | |
## uncomment to display selected information | |
#subset_ProteinGroups_names | |
#subset_ProteinGroups_ratios | |
#subset_ProteinGroups_counts | |
#' generate image for each subset of 20 proteins | |
nb_col_tot <- ncol(subset_ProteinGroups_names) | |
nb_images <- ceiling(nb_col_tot/20) | |
for (i in 1:nb_images) { | |
# calculate min and max indexes | |
min <- (i-1)*20+1 | |
max <- min(nb_col_tot, i*20) | |
# extract subset | |
subset_ratios <- as.matrix(subset_ProteinGroups_ratios[,min:max]) | |
subset_counts <- as.matrix(subset_ProteinGroups_counts[,min:max]) | |
subset_names <- as.matrix(subset_ProteinGroups_names[,min:max]) | |
# generate images | |
draw.slicesilac(subset_ratios, subset_counts, col = colorRampPalette(c("red","white","green"))(100) , | |
bg = "white", filename = paste( i, '_', output_file, sep=""), | |
legend_str=subset_names) | |
} | |
#End of script |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################################# | |
# | |
# SliceSILAC pipeline : Step 1 | |
# | |
# Parse tab files from MaxQuant to screen ratios band by band. | |
# Look for entries for which at least 2 ratios are following these criteria: | |
# - at least one ratio below 'less_value' | |
# - at least one ratio above 'more_value' | |
# | |
# | |
# Usage: | |
# perl ./gel_mobility_2-0.pl <input_file_name> <less_value> <more_value> <slice_number> output_file_name> | |
# | |
# perl Perl interpreter | |
# input_file_name path to the list of protein groups | |
# (MaxQuant output as generated with SliceSILAC experimental design). | |
# less_value threshold for SILAC H/L ratios | |
# more_value threshold for SILAC H/L ratios | |
# output_file_name path to the output file | |
# | |
# | |
# Example of command line: | |
# | |
# Select protein groups with at least one ratio below 0.8 and one above 1.0 among 18 slices, | |
# from file 'proteinGroups.txt' located in the same folder. The filtered list will be saved | |
# in a file named 'filtered_output.txt'. | |
# | |
# perl gel_mobility_2-0.pl ./proteinGroups.txt 0.8 1.0 18 filtered_output.txt | |
# | |
# | |
# Script version : 2.0 | |
# Date: 27-May-2011 | |
# Author: Manfredo Quadroni | |
# Contact: wwwpaf@unil.ch | |
# | |
############################################################################################# | |
#!/usr/bin/perl | |
my($tabfile, $less_value, $more_value, $slice_nb, $output_filename) = @ARGV; | |
#open input file | |
open( INPUTF, '<', $tabfile ) || die("Cannot open $tabfile in r mode: $! \n"); | |
#open output list file (selected protein groups) | |
open( OUTPUTF, '>', $output_filename ) || die("Cannot open $output_filename in w mode: $! \n"); | |
#open file for miscellaneous debug information | |
open( OUTPUTFC, '> debug.txt' ) || die("Cannot open debug.txt in w mode: $! \n"); | |
$index = 0; | |
#read all input file | |
@roba = <INPUTF>; | |
print OUTPUTFC "Input file : \t","$tabfile\n"; | |
print OUTPUTFC "Less value : \t","$less_value\n"; | |
print OUTPUTFC "More value : \t","$more_value\n"; | |
print OUTPUTFC "Number of slices : \t","$slice_nb\n"; | |
print OUTPUTFC "Output file : \t","$output_filename\n"; | |
foreach $line (@roba) { | |
chomp($line); | |
if ( $index eq 0 ) { | |
print OUTPUTF "$line\n"; | |
@headercut = split( /\t/, $line ); | |
for ( $i = 0 ; $i < 900 ; $i += 1 ) #scan header | |
{ | |
if ( $headercut[$i] =~ m/Normalized band/ ) { | |
push( @colsratio, $i ); | |
} | |
; # @colsratio is list of column numbers containing ratios | |
if ( $headercut[$i] =~ m/Count band/ ) { push( @colscount, $i ); } | |
; # @colscount is list of column numbers containing counts | |
} | |
print OUTPUTFC "\ncolumns w. ratios\n"; | |
print OUTPUTFC "@colsratio\n", "\ncolumns with counts\n"; | |
print OUTPUTFC "@colscount\n", "\nmmmmmmmmm\n"; | |
print OUTPUTFC "\ncolumns w. ratios\t", "@colsratio\n", | |
"\ncolumns with counts\n", "@colscount\n", "\nmmmmmmmmm\n"; | |
} | |
else { | |
$loratio = 0; | |
$hiratio = 0; | |
@cut = split( /\t/, $line ); | |
for ( $k = 0 ; $k < $slice_nb ; $k += 1 ) { | |
$coluindex = $colsratio[$k]; | |
$rr[$k] = $cut[$coluindex]; | |
}; # $rr[$k] is ratio for band $k | |
for ( $k = 0 ; $k < $slice_nb ; $k += 1 ) { | |
$countindex = $colscount[$k]; | |
$cc[$k] = $cut[$countindex]; | |
}; # $cc[$k] is count for band $k | |
print OUTPUTFC "$cut[0]\t", "ratios\t", "@rr\n"; | |
print OUTPUTFC "$cut[0]\t", "counts\t", "@cc\n"; | |
for ( $k = 0 ; $k < $slice_nb ; $k += 1 ) { # $k contains band number index -1 | |
if ( ( $rr[$k] <= $less_value ) and ( $cc[$k] >= 1 ) and ( $rr[$k] ne "" ) ) | |
{ | |
$loratio += 1; | |
print OUTPUTFC "$cut[0]\t", "$colsratio[$k]\t", "$rr[$k]\t", "lora\t", | |
"$loratio\n"; | |
} | |
; #"$rr[$k]\t","$cc[$k]\t","oooo\t";}; | |
if ( ( $rr[$k] > $more_value ) and ( $cc[$k] >= 1 ) ) { | |
$hiratio += 1; | |
print OUTPUTFC "$cut[0]\t", "$colsratio[$k]\t", "$rr[$k]\t", "hira\t", | |
"$hiratio\n"; | |
} | |
; #"$rr[$k]\t","$cc[$k]\n";}; | |
} | |
# BEWARE : do not set if (($rr[$k] <= 0.8) and ($cc[$k] > 0)) : it will take all void cases "" as good for loratio | |
} | |
if ( ( $loratio > 0 ) and ( $hiratio > 0 ) ) { | |
print OUTPUTF "$line\n"; | |
print OUTPUTFC "ID ", "$cut[0]\t", "taken\n"; | |
#print "ID ", "$cut[0]\t", "taken\n"; | |
} | |
$loratio = 0; | |
$hiratio = 0; | |
$index++; | |
} | |
#close all files | |
close(OUTPUTF); | |
close(INPUTF); | |
close(OUTPUTFC); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Source code of article :
"Quantitative Proteomics Identifies the Membrane-Associated Peroxidase GPx8 as a Cellular Substrate of the Hepatitis C Virus NS3-4A Protease"
Morikawa K, Gouttenoire J, Hernandez C, Thi VL, Tran HT, Lange CM, Dill MT, Heim MH, Donzé O, Penin F, Quadroni M, Moradpour D.
Hepatology, 2013 Aug 8
doi: http://dx.doi.org/10.1002/hep.26671 [Epub ahead of print]
PubMed PMID: 23929719