Skip to content

Instantly share code, notes, and snippets.

@stephenturner
Created November 12, 2010 20:03
Show Gist options
  • Save stephenturner/674597 to your computer and use it in GitHub Desktop.
Save stephenturner/674597 to your computer and use it in GitHub Desktop.
impute_step1.sh
#!/bin/bash
#
# Jacki Buros
# Step 1 impute
# check params
if [ -z $prefix ]
then
echo "$(date) | param 'prefix' not defined. Exiting."
return $ERR_DELETE_FILES
fi
if [ -z $chr ]
then
echo "$(date) | param 'chr' not defined. Exiting."
return $ERR_DELETE_FILES
fi
if [ -z $s1out ]
then
echo "$(date) | param 's1out' not defined. Exiting."
return $ERR_DELETE_FILES
fi
if [ -z $s0file ]
then
echo "$(date) | param 's0file' not defined. Exiting."
return $ERR_DELETE_FILES
fi
if [ -z $REFHAPS ]
then
echo "$(date) | param 'REFHAPS' not defined. Exiting."
return $ERR_DELETE_FILES
fi
if [ -z $REFSNPS ]
then
echo "$(date) | param 'REFSNPS' not defined. Exiting."
return $ERR_DELETE_FILES
fi
if [[ ! -e $REFHAPS ]]
then
echo "$(date) | reference haplotypes file (${REFHAPS}) does not exist. Exiting."
return $ERR_DELETE_FILES
fi
if [[ ! -e $REFSNPS ]]
then
echo "$(date) | reference haplotypes snps file (${REFSNPS}) does not exist. Exiting."
return $ERR_DELETE_FILES
fi
if [[ ! -e $s0file ]]
then
echo "$(date) | step 0 tar file (${s0file}) does not exist in local directory. Exiting."
return $ERR_DELETE_FILES
fi
# check executables
if [ -z $MACHBIN ]
then
echo "$(date) | param 'MACHBIN' not defined. Exiting."
return $ERR_DELETE_FILES
fi
if [ -z $PLINKBIN ]
then
echo "$(date) | param 'PLINKBIN' not defined. Exiting."
return $ERR_DELETE_FILES
fi
if [ -z $GAWKBIN ]
then
echo "$(date) | param 'GAWKBIN' not defined. Exiting."
return $ERR_DELETE_FILES
fi
if [ -z $TARBIN ]
then
echo "$(date) | param 'TARBIN' not defined. Exiting."
return $ERR_DELETE_FILES
fi
echo "=========================================================="
echo "Starting on : $(date)"
echo "Running on node : $(hostname)"
echo "Current directory : $(pwd)"
echo "Current job ID : $JOB_ID"
echo "Current job name : $JOB_NAME"
echo "Param value (chr) : $chr"
echo "=========================================================="
mach_settings=" --compact --greedy --rounds 100 "
# ---- settings ------
# SUBSET and COMPLETE prefixes
COMPLETE="_${prefix}_chr${chr}_complete" # plink binary files containing genotype data for complete sample
SUBSET="_${prefix}_chr${chr}_subset" # plink binary files containing genotype data for step1 impute individuals only
GROUP="_${prefix}_chr${chr}_group" # prefix for per-group id lists
file="${SUBSET}_chr${chr}" # temp files prefix
# extract files required for this analysis
$TARBIN xfz ${s0file} ${SUBSET}.*
rm $s0file
if [[ -e ${SUBSET}.bed ]] ; then
# filter by chrom
echo "${PLINKBIN} --bfile ${SUBSET} --noweb --recode --set-hh-missing --chr ${chr} --out ${file}"
$PLINKBIN --bfile ${SUBSET} --noweb --recode --set-hh-missing --chr ${chr} --out ${file}
rm ${SUBSET}.*
else
echo "$(date) | Plink file ${SUBSET}.bed does not exist (TAR extract failed or file not in tar archive ${s0file}). Exiting"
return $ERR_COPY_FILES
fi
echo "$(date) | Preparing maps in MACH format"
if [[ -e ${file}.map ]]; then
$GAWKBIN '{print $1,$2,$4}' ${file}.map > ${file}.pmap
$GAWKBIN 'BEGIN {print "T","pheno";}{print "M",$2;}' ${file}.map > ${file}.dat
else
echo "$(date) | Plink ped file ${file}.map does not exist. Exiting"
return $ERR_COPY_FILES
fi
if [[ -e ${file}.ped && -e ${file}.dat ]]; then
echo "$(date) | Begin step 1 impute"
$MACHBIN --dat ${SUBSET}_chr${chr}.dat --ped ${SUBSET}_chr${chr}.ped --snps ${REFSNPS} --haps ${REFHAPS} $mach_settings --prefix ${s1out} --autoFlip > ${s1out}.log
machreturn=$?
echo "$(date) | Step 1 complete, exit code = ${?}"
else
echo "$(date) | Source genotype files ${file}.dat and ${file}.ped do not exist. Exiting"
return $ERR_COPY_FILES
fi
# Clean up
if [[ $machreturn -eq 0 ]] ; then
rm ${file}.*
return $OK
else
return $ERR_COPY_FILES
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment