tkrahn

## extract_fastq_from_bam.sh
#!/bin/bash

# Extracting paired fastq files directly from a BAM file
# https://gist.github.com/darencard/72ddd9e6c08aaff5ff64ca512a04a6dd
# This script requires a lot of disk space! (about 7 x BAM file size)
YSEQID="99999"
original_bam="${YSEQID}.bam"
threads=80

echo "Splitting BAM file..."

## gist:41201fe5b1e4e3d6f8a923c48320ab88
#!/bin/bash
tar xf 999999.bam.tgz
samtools merge 999999_merged.bam *.bam

# For extracting fastq files, the following steps are not required
# But if you want to use the BAM file directly you'll need to sort and index it to the correct reference sequence

samtools sort -o 999999.sorted.bam 99999_merged.bam
samtools index 999999.sorted.bam

## bigY_hg38_pipeline.sh
#!/bin/bash
START=$(date +%s.%N)
clear
# setup parameters

YSEQID=${PWD##*/}
# YSEQID="1234" # (the above command simply gets the name of the last segment of the current working directory)
NUM_THREADS=80

REF="/genomes/0/refseq/hg38/hg38.fa"

## gist:484cb64430d5c4cea8a2b86c105318b3
# mtDNA allele calling $ FASTA file generation
samtools mpileup -r chrM -u -C 50 -v -f ${REF} ${BAMFILE_SORTED} | bcftools call -O z -v -m -P 0  > chrM_${VCF_FILE}.gz
tabix chrM_${VCF_FILE}.gz
samtools faidx $REF chrM | bcftools consensus chrM_${VCF_FILE}.gz -o ${YSEQID}_mtDNA.fasta

## bwa_hg19_23andMe_pipeline.sh
#!/bin/bash
START=$(date +%s.%N)
clear
# setup parameters

YSEQID=${PWD##*/}
# YSEQID="1234" # (the above command simply gets the name of the last segment of the current working directory)

NUM_THREADS=$(getconf _NPROCESSORS_ONLN)
echo "We can use ${NUM_THREADS} threads."

## microbiome_pipeline.sh
#!/bin/bash
clear
YSEQ_ID=${PWD##*/}
# Exract umapped reads from the BAM file
samtools view -b ${YSEQ_ID}_bwa-mem_hg19_sorted.bam > ${YSEQ_ID}_unmapped.bam '*'

bedtools bamtofastq -i ${YSEQ_ID}_unmapped.bam -fq ${READS_1} -fq2 ${READS_2}


# Check if there are any bacteria?

## wbtg2_pipeline.sh
#!/bin/bash
START=$(date +%s.%N)
clear
# setup parameters

YSEQID=${PWD##*/}
# YSEQID="1234" # (the above command simply gets the name of the last segment of the current working directory)

NUM_THREADS=$(getconf _NPROCESSORS_ONLN)
echo "We can use ${NUM_THREADS} threads."

## minimap2_hg38_pipeline.sh
#!/bin/bash
START=$(date +%s.%N)
clear
# setup parameters

YSEQID=${PWD##*/}
# YSEQID="1234" # (the above command simply gets the name of the last segment of the current working directory)

NUM_THREADS=$(getconf _NPROCESSORS_ONLN)
echo "We can use ${NUM_THREADS} threads."

## parallelMpileup.sh
#!/bin/bash
NUM_THREADS=$(getconf _NPROCESSORS_ONLN)
REF="MyReference.fa"
BAMFILE_SORTED="MySortedAndIndexed.bam"
VCF_FILE="My.vcf"

# Parallel SNP calling by chromosome
bcftools mpileup -r chr1 -Ou -C 50 -f $REF $BAMFILE_SORTED | bcftools call -O z --threads $NUM_THREADS -v -V indels -m -P 0 > chr1_${VCF_FILE}.gz &
bcftools mpileup -r chr2 -Ou -C 50 -f $REF $BAMFILE_SORTED | bcftools call -O z --threads $NUM_THREADS -v -V indels -m -P 0 > chr2_${VCF_FILE}.gz &
bcftools mpileup -r chr3 -Ou -C 50 -f $REF $BAMFILE_SORTED | bcftools call -O z --threads $NUM_THREADS -v -V indels -m -P 0 > chr3_${VCF_FILE}.gz &

## link2cladefinder.html
<html>
  <body>
    Here is a Link to <a href="https://cladefinder.yseq.net/interactive_tree.php?snps=L21%2B">L21</a><br>
    The "Percent 2B" represents the encoding for the plus sign.
  </body>
</html>
	#!/bin/bash

	# Extracting paired fastq files directly from a BAM file
	# https://gist.github.com/darencard/72ddd9e6c08aaff5ff64ca512a04a6dd
	# This script requires a lot of disk space! (about 7 x BAM file size)
	YSEQID="99999"
	original_bam="${YSEQID}.bam"
	threads=80

	echo "Splitting BAM file..."
	#!/bin/bash
	tar xf 999999.bam.tgz
	samtools merge 999999_merged.bam *.bam

	# For extracting fastq files, the following steps are not required
	# But if you want to use the BAM file directly you'll need to sort and index it to the correct reference sequence

	samtools sort -o 999999.sorted.bam 99999_merged.bam
	samtools index 999999.sorted.bam
	#!/bin/bash
	START=$(date +%s.%N)
	clear
	# setup parameters

	YSEQID=${PWD##*/}
	# YSEQID="1234" # (the above command simply gets the name of the last segment of the current working directory)
	NUM_THREADS=80

	REF="/genomes/0/refseq/hg38/hg38.fa"
	# mtDNA allele calling $ FASTA file generation
	samtools mpileup -r chrM -u -C 50 -v -f ${REF} ${BAMFILE_SORTED} \| bcftools call -O z -v -m -P 0 > chrM_${VCF_FILE}.gz
	tabix chrM_${VCF_FILE}.gz
	samtools faidx $REF chrM \| bcftools consensus chrM_${VCF_FILE}.gz -o ${YSEQID}_mtDNA.fasta
	#!/bin/bash
	clear
	YSEQ_ID=${PWD##*/}
	# Exract umapped reads from the BAM file
	samtools view -b ${YSEQ_ID}_bwa-mem_hg19_sorted.bam > ${YSEQ_ID}_unmapped.bam '*'

	bedtools bamtofastq -i ${YSEQ_ID}_unmapped.bam -fq ${READS_1} -fq2 ${READS_2}


	# Check if there are any bacteria?
	#!/bin/bash
	NUM_THREADS=$(getconf _NPROCESSORS_ONLN)
	REF="MyReference.fa"
	BAMFILE_SORTED="MySortedAndIndexed.bam"
	VCF_FILE="My.vcf"

	# Parallel SNP calling by chromosome
	bcftools mpileup -r chr1 -Ou -C 50 -f $REF $BAMFILE_SORTED \| bcftools call -O z --threads $NUM_THREADS -v -V indels -m -P 0 > chr1_${VCF_FILE}.gz &
	bcftools mpileup -r chr2 -Ou -C 50 -f $REF $BAMFILE_SORTED \| bcftools call -O z --threads $NUM_THREADS -v -V indels -m -P 0 > chr2_${VCF_FILE}.gz &
	bcftools mpileup -r chr3 -Ou -C 50 -f $REF $BAMFILE_SORTED \| bcftools call -O z --threads $NUM_THREADS -v -V indels -m -P 0 > chr3_${VCF_FILE}.gz &
	<html>
	<body>
	Here is a Link to <a href="https://cladefinder.yseq.net/interactive_tree.php?snps=L21%2B">L21</a><br>
	The "Percent 2B" represents the encoding for the plus sign.
	</body>
	</html>