Last active
July 21, 2017 01:36
-
-
Save wenjie1991/49b58471edd751901067af6d8339e9f3 to your computer and use it in GitHub Desktop.
Variation calling pipeline
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Alignemnt | |
``` | |
## input file | |
inputFastq1=../data/fastq/input_R1.fastq.gz ## input file name | |
inputFastq2=../data/fastq/input_R2.fastq.gz ## input file name | |
## Output | |
mkdir -p ../tmp/bam ## Output fold | |
mappedBam=../tmp/bam/output.mapped.bam | |
## temporary file, delet manually | |
uBam=../tmp/bam/${sampleMark}.bam | |
adaptersMarkedBam=../tmp/bam/${sampleMark}.adaptersMarked.bam | |
metricsFile=../tmp/bam/${sampleMark}.metrics.txt | |
## program dir | |
javaCmd=java | |
memory=10 ## Gb | |
picardJar=~/bin/picard.jar | |
gatkJar=~/bin/GenomeAnalysisTK.jar | |
nCore=5 ## Number of threads | |
## public data dir | |
ref_fasta=ucsc.hg19.fasta | |
tmpDir=/tmp | |
################################################# | |
## run | |
## fastq to sam | |
${javaCmd} -Xmx${memory}G -jar ${picardJar} FastqToSam \ | |
FASTQ=${inputFastq1} \ | |
FASTQ2=${inputFastq2} \ | |
OUTPUT=${uBam} \ | |
READ_GROUP_NAME=${sample_name} \ | |
SAMPLE_NAME=${sample_name} \ | |
LIBRARY_NAME=${sample_name} \ | |
PLATFORM_UNIT=illumina \ | |
PLATFORM=illumina \ | |
TMP_DIR=/tmp | |
## mark adapter | |
${javaCmd} -Xmx${memory}G -jar ${picardJar} MarkIlluminaAdapters \ | |
I=${uBam} \ | |
O=${adaptersMarkedBam} \ | |
M=${metricsFile} \ | |
TMP_DIR=/tmp | |
## mapping | |
threadBwa=$nCore | |
${javaCmd} -Xmx${memory}G -jar ${picardJar} SamToFastq \ | |
I=${adaptersMarkedBam} \ | |
FASTQ=/dev/stdout \ | |
CLIPPING_ATTRIBUTE=XT CLIPPING_ACTION=2 INTERLEAVE=true NON_PF=true \ | |
TMP_DIR=/tmp | \ | |
bwa mem -M -t ${threadBwa} -p ${ref_fasta} /dev/stdin | \ | |
${javaCmd} -Xmx${memory}G -jar ${picardJar} MergeBamAlignment \ | |
ALIGNED_BAM=/dev/stdin \ | |
UNMAPPED_BAM=${uBam} \ | |
OUTPUT=${mappedBam} \ | |
R=${ref_fasta} CREATE_INDEX=true ADD_MATE_CIGAR=true \ | |
CLIP_ADAPTERS=false CLIP_OVERLAPPING_READS=true \ | |
INCLUDE_SECONDARY_ALIGNMENTS=true MAX_INSERTIONS_OR_DELETIONS=-1 \ | |
PRIMARY_ALIGNMENT_STRATEGY=MostDistant ATTRIBUTES_TO_RETAIN=XS \ | |
TMP_DIR=/tmp | |
``` | |
# Remove Duplication | |
``` | |
$java -jar ~/bin/picard.jar MarkDuplicates \ | |
I=input_bam_file \ | |
O=output_bam_file \ | |
M=mark_dup_matrix.txt \ | |
REMOVE_DUPLICATES=true | |
``` | |
# Variation calling | |
## **Need index first** by `samtools index bamfile.bam` | |
``` | |
$java -Xmx14G -jar ~/bin/GenomeAnalysisTK.jar -T UnifiedGenotyper \ | |
-nt 7 \ ## threads number | |
-R refSeq.fa \ | |
-I input_bam_file.bam \ | |
-o output_vcf.vcf \ | |
-glm BOTH \ | |
--min_base_quality_score 17 \ | |
--min_indel_count_for_genotyping 5 \ | |
--min_indel_fraction_per_sample 0.05 \ | |
--output_mode EMIT_VARIANTS_ONLY \ | |
--sample_ploidy 2 \ | |
--standard_min_confidence_threshold_for_calling 20.0 \ | |
--standard_min_confidence_threshold_for_emitting 20.0 | |
``` | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment