Created
August 31, 2023 12:58
-
-
Save cmdcolin/d60b44d56f4a14cec3f1ff0d6edf7187 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
export JB2DIR=~/src/jbrowse-components/test_data/hg002 | |
export NCBIROOT=https://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/HG002_NA24385_son | |
## download chr22 from the Illumina 2x250bp reads and 300x reads | |
samtools view $NCBI_ROOT/NIST_Illumina_2x250bps/novoalign_bams/HG002.hs37d5.2x250.bam 22 -o HG002.hs37d5.2x250.chr22.bam | |
samtools view $NCBI_ROOT/NIST_HiSeq_HG002_Homogeneity-10953946/NHGRI_Illumina300X_AJtrio_novoalign_bams/HG002.hs37d5.300x.bam 22 -o HG002.hs37d5.300x.chr22.bam | |
## select chr22 out of the HG002 T2T assembly, contains a full chromosome for PATERNAL and MATERNAL | |
samtools faidx 'https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/scratch/HG002/assemblies/drafts/assembly.v0.7.fasta' chr22_PATERNAL > chr22_PATERNAL.fasta | |
samtools faidx 'https://s3-us-west-2.amazonaws.com/human-pangenomics/T2T/scratch/HG002/assemblies/drafts/assembly.v0.7.fasta' chr22_MATERNAL > chr22_MATERNAL.fasta | |
wget ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz | |
gunzip hs37d5.fa.gz | |
samtools faidx hs37d5.fa | |
samtools faidx hs37d5.fa 22 > hg19_chr22.fa | |
## combine | |
cat chr22_*.fasta > chr22.fa | |
## convert GIAB bam to FASTQ and then re-align to new combined mat+pat chr22.fa | |
for COV in 2x250 300x; do | |
samtools collate -u -O HG002.hs37d5.$COV.chr22.bam | samtools fastq -1 $COV.paired1.fq -2 $COV.paired2.fq -0 /dev/null -s /dev/null -n | |
quickalign_paired.sh chr22.fa $COV.paired1.fq $COV.paired2.fq $COV.cram | |
done; | |
## add assemblies to jbrowse 2 | |
for VAR in MATERNAL PATERNAL; do | |
## add assembly to JBrowse 2 (maternal and paternal added as 'separate genome assemblies') | |
jbrowse add-assembly chr22_$VAR.fasta --out $JB2DIR --load copy | |
done; | |
# add cram tracks and bigwigs | |
for VAR in MATERNAL PATERNAL; do | |
for COV in 2x250 300x; do | |
samtools view $COV.cram chr22_$VAR -o "${COV}_${VAR}".cram | |
samtools index "${COV}_${VAR}".cram | |
jbrowse add-track "${COV}_${VAR}".cram --out $JB2DIR --load copy -a chr22_$VAR --force | |
mosdepth "${COV}_${VAR}".cram -b 100 -f chr22.fa "${COV}_${VAR}".cram | |
gunzip "${COV}_${VAR}".cram.per-base.bed.gz | |
bedGraphToBigWig "${COV}_${VAR}".cram.per-base.bed chr22.fa.fai "${COV}_${VAR}".cram.per-base.bw | |
jbrowse add-track "${COV}_${VAR}".cram.per-base.bw --out $JB2DIR --load copy -a chr22_$VAR --force | |
done; | |
done; | |
## whole-genome alignment of chr22_MATERNAL vs chr22_PATERNAL with and without CIGAR (without a bit faster) | |
minimap2 -x asm5 chr22_MATERNAL.fasta chr22_PATERNAL.fasta > chr22_MATERNAL_vs_PATERNAL.paf | |
minimap2 -c -x asm5 chr22_MATERNAL.fasta chr22_PATERNAL.fasta > chr22_MATERNAL_vs_PATERNAL_cigar.paf | |
## load whole-genome alignment as a 'synteny track' into jbrowse 2 | |
jbrowse add-track chr22_MATERNAL_vs_PATERNAL_cigar.paf -a chr22_PATERNAL,chr22_MATERNAL --out $JB2DIR --load copy | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment