Skip to content

Instantly share code, notes, and snippets.

@lindenb
Last active February 12, 2016 11:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lindenb/832a8da14217238a019d to your computer and use it in GitHub Desktop.
Save lindenb/832a8da14217238a019d to your computer and use it in GitHub Desktop.
"Petit dej de la Bioinfo" November, 12 2015. NGS data format & worflows: XML model and XSLT stylesheet converting model.xml to a Makefile

usage

xsltproc  model2make.xsl model | make -j 3 -f -
<?xml version="1.0" encoding="UTF-8"?>
<model ref="ref.fa">
<sample name="S1">
<fastq>
<for>S1_01_R1.fq.gz</for>
<rev>S1_01_R2.fq.gz</rev>
</fastq>
<fastq>
<for>S1_02_R1.fq.gz</for>
<rev>S1_02_R2.fq.gz</rev>
</fastq>
<fastq>
<for>S1_03_R1.fq.gz</for>
<rev>S1_03_R2.fq.gz</rev>
</fastq>
</sample>
<sample name="S2">
<fastq>
<for>S2_01_R1.fq.gz</for>
<rev>S2_01_R2.fq.gz</rev>
</fastq>
<fastq>
<for>S2_02_R1.fq.gz</for>
<rev>S2_02_R2.fq.gz</rev>
</fastq>
<fastq>
<for>S2_03_R1.fq.gz</for>
<rev>S2_03_R2.fq.gz</rev>
</fastq>
</sample>
<sample name="S3">
<fastq>
<for>S3_01_R1.fq.gz</for>
<rev>S3_01_R2.fq.gz</rev>
</fastq>
<fastq>
<for>S3_02_R1.fq.gz</for>
<rev>S3_02_R2.fq.gz</rev>
</fastq>
<fastq>
<for>S3_03_R1.fq.gz</for>
<rev>S3_03_R2.fq.gz</rev>
</fastq>
<fastq>
<for>S3_04_R1.fq.gz</for>
<rev>S3_04_R2.fq.gz</rev>
</fastq>
</sample>
<sample name="S4">
<fastq>
<for>S4_01_R1.fq.gz</for>
<rev>S4_01_R2.fq.gz</rev>
</fastq>
</sample>
</model>
<?xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="text" encoding="US-ASCII"/>
<xsl:template match="/">
samtools.dir=../samtools/
samtools.exe=${samtools.dir}samtools
wgsim.exe=${samtools.dir}misc/wgsim
bwa.exe=../bwa/bwa
picard.jar=../picard-tools-1.138/picard.jar
bcftools.exe=../bcftools/bcftools
<xsl:apply-templates select="model"/>
</xsl:template>
<xsl:template match="model">
REF=<xsl:value-of select="@ref"/>
mutations.vcf.gz : ${REF} <xsl:for-each select="sample">\
$(addsuffix .bam.bai ,<xsl:value-of select="@name"/>) </xsl:for-each>
${samtools.exe} mpileup -u -f $&lt; $(filter %.bam,$(basename $^)) |\
${bcftools.exe} call -c -v -O z -o $@ -
<xsl:apply-templates select="sample"/>
$(addsuffix .bwt,${REF}) : ${REF}
${bwa.exe} index $&lt;
${REF}:
<![CDATA[ echo ">rotavirus" > $@
echo "GGCTTTTAATGCTTTTCAGTGGTTGCTGCTCAAGATGGAGTCTACTCAGCAGATGGTAAGCTCTATTATT" >> $@
echo "AATACTTCTTTTGAAGCTGCAGTTGTTGCTGCTACTTCAACATTAGAATTAATGGGTATTCAATATGATT" >> $@
echo "ACAATGAAGTATTTACCAGAGTTAAAAGTAAATTTGATTATGTGATGGATGACTCTGGTGTTAAAAACAA" >> $@
echo "TCTTTTGGGTAAAGCTATAACTATTGATCAGGCGTTAAATGGAAAGTTTAGCTCAGCTATTAGAAATAGA" >> $@
echo "AATTGGATGACTGATTCTAAAACGGTTGCTAAATTAGATGAAGACGTGAATAAACTTAGAATGACTTTAT" >> $@
echo "CTTCTAAAGGGATCGACCAAAAGATGAGAGTACTTAATGCTTGTTTTAGTGTAAAAAGAATACCAGGAAA" >> $@
echo "ATCATCATCAATAATTAAATGCACTAGACTTATGAAGGATAAAATAGAACGTGGAGAAGTTGAGGTTGAT" >> $@
echo "GATTCATATGTTGATGAGAAAATGGAAATTGATACTATTGATTGGAAATCTCGTTATGATCAGTTAGAAA" >> $@
echo "AAAGATTTGAATCACTAAAACAGAGGGTTAATGAGAAATACAATACTTGGGTACAAAAAGCGAAGAAAGT" >> $@
echo "AAATGAAAATATGTACTCTCTTCAGAATGTTATCTCACAACAGCAAAACCAAATAGCAGATCTTCAACAA" >> $@
echo "TATTGTAGTAAATTGGAAGCTGATTTGCAAGGTAAATTTAGTTCATTAGTGTCATCAGTTGAGTGGTATC" >> $@
echo "TAAGGTCTATGGAATTACCAGATGATGTAAAGAATGACATTGAACAGCAGTTAAATTCAATTGATTTAAT" >> $@
echo "TAATCCCATTAATGCTATAGATGATATCGAATCGCTGATTAGAAATTTAATTCAAGATTATGACAGAACA" >> $@
echo "TTTTTAATGTTAAAAGGACTGTTGAAGCAATGCAACTATGAATATGCATATGAGTAGTCATATAATTAAA" >> $@
echo "AATATTAACCATCTACACATGACCCTCTATGAGCACAATAGTTAAAAGCTAACACTGTCAAAAACCTAAA" >> $@
echo "TGGCTATAGGGGCGGTTTGTGACC" >> $@
echo "" >> $@
]]>
graph.png:
make -ndrB -f Makefile | ../makefile2graph/make2graph | dot -Tpng -o$@
</xsl:template>
<xsl:template match="sample">
$(addsuffix .bam.bai ,<xsl:value-of select="@name"/>): $(addsuffix .bam,<xsl:value-of select="@name"/>)
${samtools.exe} index $&lt;
$(addsuffix .bam,<xsl:value-of select="@name"/>): <xsl:for-each select="fastq"> \
$(addsuffix .bam,<xsl:value-of select="for"/>) </xsl:for-each>
java -jar ${picard.jar} MergeSamFiles AS=true O=$@ $(foreach B,$^, I=${B} )
<xsl:apply-templates select="fastq"/>
</xsl:template>
<xsl:template match="fastq">
$(addsuffix .bam,<xsl:value-of select="for"/>): $(addsuffix .bwt,${REF}) <xsl:value-of select="for"/> <xsl:text> </xsl:text> <xsl:value-of select="rev"/>
${bwa.exe} mem -R '@RG\tID:<xsl:value-of select="../@name"/>\tSM:<xsl:value-of select="../@name"/>' ${REF} $(filter %.gz,$^) |\
${samtools.exe} view -Sbu - |\
${samtools.exe} sort -O sam -o $@ -T $(basename $@) -
<xsl:value-of select="rev"/> : <xsl:value-of select="for"/>
gzip --best -f $(basename $@)
<xsl:value-of select="for"/> : ${REF}
@sleep 1
${wgsim.exe} -r 0.01 -S <xsl:value-of select="1 + count(../preceding-sibling::sample)"/> -N 1000 $&lt; $(basename <xsl:value-of select="for"/> <xsl:text> </xsl:text> <xsl:value-of select="rev"/>)
gzip --best -f $(basename $@)
</xsl:template>
</xsl:stylesheet>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment