Last active
April 4, 2017 20:10
-
-
Save nievergeltlab/55b4eb483b3903b1c6ddd4c41e66950a to your computer and use it in GitHub Desktop.
This makes imputed probabilities from ricopili into a dosage file format Working with job system (TORQUE). Have a list of files.
Want to run same operation on all of them.
Can run multiple at the same time on a single node The files to be processed are listed in a single file. The number of processes to be run on a single node are noted. The job…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#PBS -lnodes=1 | |
#PBS -lwalltime=0:05:00 | |
#!/bin/bash | |
while getopts n:o:p:d:s: option | |
do | |
case "${option}" | |
in | |
o) outputfile=${OPTARG};; | |
n) nodeuse=${OPTARG};; | |
p) probdir=${OPTARG};; | |
d) outdir=${OPTARG};; | |
s) nsub=${OPTARG};; | |
esac | |
done | |
#This version of the script is made for GEMMA format files | |
#Write the start and stop points of the file | |
jstart=$((($PBS_ARRAYID-1)*$nodeuse +1)) | |
jstop=$(($PBS_ARRAYID*$nodeuse)) | |
for j in $(seq -w $jstart 1 $jstop) | |
do | |
file_use=$(awk -v lineno=$j '{if(NR==lineno) print}' $outputfile) | |
zcat "$probdir"/"$file_use" | awk -v s=$nsub '{ printf $1 "," $2 "," $3; for(i=1; i<=s; i++) printf "," $(i*2+2)*2+$(i*2+3); printf "\n" }' | gzip > "$outdir"/"$file_use".doscnt.gz & | |
done | |
wait | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#PBS -lnodes=1 | |
#PBS -lwalltime=0:05:00 | |
#!/bin/bash | |
while getopts n:o:p:d:s:v: option | |
do | |
case "${option}" | |
in | |
o) outputfile=${OPTARG};; | |
n) nodeuse=${OPTARG};; | |
p) probdir=${OPTARG};; | |
d) outdir=${OPTARG};; | |
s) nsub=${OPTARG};; | |
v) valid_snplist=${OPTARG};; | |
esac | |
done | |
#This version of the script is made for GMMAT format files. GMMAT only works with non-monomorphic SNPS. requires a SNPLIST to be provided | |
#Write the start and stop points of the file | |
jstart=$((($PBS_ARRAYID-1)*$nodeuse +1)) | |
jstop=$(($PBS_ARRAYID*$nodeuse)) | |
for j in $(seq -w $jstart 1 $jstop) | |
do | |
file_use=$(awk -v lineno=$j '{if(NR==lineno) print}' $outputfile) | |
LC_ALL=C join $valid_snplist <(zcat "$probdir"/"$file_use" | awk -v s=$nsub '{ printf $1 " " $2 " " $3; for(i=1; i<=s; i++) printf " " $(i*2+2)*2+$(i*2+3); printf "\n" }' | LC_ALL=C sort -k1b,1) | gzip > "$outdir"/"$file_use".doscnt.gmmat.gz & | |
done | |
wait | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Assumes that data is in 2 column probability format (probability of homozygote monor, heterozygote), with 3 preceding values (SNP, A1, A2) | |
nsub=$(wc -l probfile.fam | awk '{print $1}') | |
zcat probfile.gz | awk -v s=$nsub '{ printf $1 "," $2 "," $3; for(i=1; i<=s; i++) printf "," $(i*2+2)*2+$(i*2+3); printf "\n" }' | gzip > dosagefile.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Specify where probability format genotypes are stored | |
probdir=/home/maihofer/vets/qc/imputation/dasuqc1_pts_vets_mix_am-qc.hg19.ch.fl/qc1 | |
#Specify where output will be stored | |
outdir=/home/maihofer/vets/qc/imputation/dasuqc1_pts_vets_mix_am-qc.hg19.ch.fl/qc1_dose | |
#Specify working directory (error logs will go here) | |
workingdir=/home/maihofer/vets/qc/imputation/ | |
#Get the number of subjects (assuming that it is the same across all files) | |
nsub=$(wc -l "$probdir"/dos_pts_vets_mix_am-qc.hg19.ch.fl.chr1_234_237.out.dosage.fam | awk '{print $1}') | |
ls $probdir | grep .gz$ > files_To_make_dose.txt | |
#Number of commands to run is a function of the number of files | |
ncommands=$(wc -l files_To_make_dose.txt | awk '{print $1}' ) | |
#Make a job code, where 'nodesize' processes will run on each node simultaneously | |
nodesize=16 | |
nodeuse=$(($nodesize - 1)) | |
#Total number of jobs = Number of commands / number of commands used per job, rounded up | |
totjobs=$(( ($ncommands + $nodeuse - 1 ) / $nodeuse )) | |
qsub make_dose.pbs -t 1-$totjobs -d $workingdir -F "-s $nsub -d $outdir -p $probdir -n $nodeuse -o files_To_make_dose.txt" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Specify where probability format genotypes are stored | |
probdir=/home/maihofer/vets/qc/imputation/dasuqc1_pts_vets_mix_am-qc.hg19.ch.fl/qc1 | |
#Specify where output will be stored | |
outdir=/home/maihofer/vets/qc/imputation/dasuqc1_pts_vets_mix_am-qc.hg19.ch.fl/qc1_dose | |
#Specify working directory (error logs will go here) | |
workingdir=/home/maihofer/vets/qc/imputation/ | |
#Get the number of subjects (assuming that it is the same across all files) | |
nsub=$(wc -l "$probdir"/dos_pts_vets_mix_am-qc.hg19.ch.fl.chr1_234_237.out.dosage.fam | awk '{print $1}') | |
#Specify list of valid SNPs | |
valid_snps=/home/maihofer/vets/qc/european_valid_snps.sorted.snplist | |
ls $probdir | grep .gz$ > files_To_make_dose.txt | |
#Number of commands to run is a function of the number of files | |
ncommands=$(wc -l files_To_make_dose.txt | awk '{print $1}' ) | |
#Make a job code, where 'nodesize' processes will run on each node simultaneously | |
nodesize=16 | |
nodeuse=$(($nodesize - 1)) | |
#Total number of jobs = Number of commands / number of commands used per job, rounded up | |
totjobs=$(( ($ncommands + $nodeuse - 1 ) / $nodeuse )) | |
qsub make_dose_gmmat.pbs -t 1-$totjobs -d $workingdir -F "-s $nsub -d $outdir -p $probdir -n $nodeuse -v $valid_snps -o files_To_make_dose.txt" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment