williamrowell/dotplot workflow

## dotplot workflow
module load smrtanalysis/mainline jre parallel

# extract fasta from ccs readset
bam2fasta -u -o consensusreadset consensusreadset.bam

# make dotplots with gepard
python split_ccs_fasta.py consensusreadset.fasta
## download gepard and prepare it
wget https://github.com/univieCUBE/gepard/archive/v1.40.0.zip
unzip v1.40.0.zip
mv dist/Gepard-1.40.jar resources/matrices/edna.mat .
export GEPARD="java -cp Gepard-1.40.jar org.gepard.client.cmdline.CommandLine"
## make dotplots, one per zmw
parallel '$GEPARD -seq1 {} -seq2 {} -matrix edna.mat -outfile {.}.png' ::: zmw_*.fasta

# OR, make dotplots with flexidot
wget https://raw.githubusercontent.com/molbio-dresden/flexidot/master/code/flexidot_v1.02.py
dos2unix flexidot_v1.02.py
chmod +x flexidot_v1.02.py
sed '1 s:#!/usr/bin/python2.7:#!/usr/bin/env python:' # change the shebang to something better
## if desired, make a virtual environment in python and activate
conda create -n flexidot-env python2.7; source activate flexidot-env
## make dotplot collages
python flexidot.py -i consensusreadset.fasta

## split_ccs_fasta.py
#!/usr/bin/env python
import sys
from Bio import SeqIO


with open(sys.argv[1], "rU") as handle:
    for record in SeqIO.parse(handle, "fasta"):
        filename = 'zmw_' + record.id.split('/')[1] + '.fasta'
        with open(filename, "w") as output_handle:
            SeqIO.write(record, output_handle, "fasta")
	module load smrtanalysis/mainline jre parallel

	# extract fasta from ccs readset
	bam2fasta -u -o consensusreadset consensusreadset.bam

	# make dotplots with gepard
	python split_ccs_fasta.py consensusreadset.fasta
	## download gepard and prepare it
	wget https://github.com/univieCUBE/gepard/archive/v1.40.0.zip
	unzip v1.40.0.zip
	mv dist/Gepard-1.40.jar resources/matrices/edna.mat .
	export GEPARD="java -cp Gepard-1.40.jar org.gepard.client.cmdline.CommandLine"
	## make dotplots, one per zmw
	parallel '$GEPARD -seq1 {} -seq2 {} -matrix edna.mat -outfile {.}.png' ::: zmw_*.fasta

	# OR, make dotplots with flexidot
	wget https://raw.githubusercontent.com/molbio-dresden/flexidot/master/code/flexidot_v1.02.py
	dos2unix flexidot_v1.02.py
	chmod +x flexidot_v1.02.py
	sed '1 s:#!/usr/bin/python2.7:#!/usr/bin/env python:' # change the shebang to something better
	## if desired, make a virtual environment in python and activate
	conda create -n flexidot-env python2.7; source activate flexidot-env
	## make dotplot collages
	python flexidot.py -i consensusreadset.fasta
	#!/usr/bin/env python
	import sys
	from Bio import SeqIO


	with open(sys.argv[1], "rU") as handle:
	for record in SeqIO.parse(handle, "fasta"):
	filename = 'zmw_' + record.id.split('/')[1] + '.fasta'
	with open(filename, "w") as output_handle:
	SeqIO.write(record, output_handle, "fasta")