Created
November 16, 2010 21:50
-
-
Save stephenturner/702587 to your computer and use it in GitHub Desktop.
parallelize_plink_ibdibd.pl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# | |
#Parses a .fam file and generates N number of sample list files depending on 2nd argument | |
#Generates PBS cluster submission files | |
#Justin Giles, Vanderbilt University, 2010 | |
# | |
use strict; | |
if(scalar(@ARGV) != 3){ | |
print "\nPLEASE NOTE: EDIT THIS SCRIPT IN THE PBS AREA TO MODIFY IT TO YOUR NEEDS\n"; | |
print "parallelize_plink_ibd.pl <bedfile_base> <chunksize> <PLINK_frq_file>\n"; | |
print "\nExample:\n"; | |
print "parallelize_plink_ibd.pl /scratch/turnersd/dataprefix 100 /scratch/turnersd/freq.frq\n"; | |
exit; | |
} | |
my $bfile_base=$ARGV[0]; | |
my $NUM_IN_FILE=$ARGV[1]; | |
my $freq_file = $ARGV[2]; | |
my $num_fam=`wc $bfile_base.fam | awk '{ print $1 }'`; | |
`gawk '{print \$1,\$2}' $bfile_base.fam | split -d -a 3 -l $NUM_IN_FILE - tmp.list`; | |
my $path = `pwd`; | |
chomp($path); | |
my $i=0; | |
my $a=`ls tmp* | wc | awk '{ print $1 }'`-1; | |
my $j=0; | |
while( $i <= $a ){ | |
while($j <= $a ){ | |
my $file = "submitme_" . $i . "_" . $j . ".pbs"; | |
open(OUT, ">$file") || die "Cannot open $file\n"; | |
print OUT "#!/bin/ksh\n | |
##EDIT THIS TO YOUR EMAIL## | |
#PBS -M username\@domain.com | |
#PBS -m bae | |
##CHANGE THIS IF YOU NEED MORE NODES FOR THIS PROCESS## | |
#PBS -l nodes=1 | |
##CHANGE WALLTIME AND CPUT TO MATCH THE LENGTH OF THIS PROCESS## | |
#PBS -l walltime=10:00:00\n | |
#PBS -l cput=10:00:00\n | |
##CHANGE THE PMEM AND MEM TO MATCH THE MEMORY REQUIREMENTS OF THIS PROCESS## | |
#PBS -l pmem=7000mb\n | |
#PBS -l mem=7000mb\n | |
#PBS -j oe\n | |
## CHANGE THE GROUP TO MATCH YOUR GROUP## | |
#PBS -W group_list=YOUR_GROUP_NAME | |
dir=/scratch/\$USER/ibd/\n | |
[ -d \$dir ] || mkdir \$dir | |
cd \$dir/\n\n"; | |
my $i_file = `printf "%03i\n" $i`; | |
my $j_file = `printf "%03i\n" $j`; | |
chomp($i_file); | |
chomp($j_file); | |
print OUT "plink --bfile $bfile_base \\ | |
--read-freq $freq_file \\ | |
--genome --min 0.05 \\ | |
--genome-lists $path/tmp.list$i_file \\ | |
$path/tmp.list$j_file \\ | |
--out data.sub.$i.$j\n"; | |
close(OUT); | |
$j=$j+1; | |
} | |
$i=$i+1; | |
$j=$i; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment