Skip to content

Instantly share code, notes, and snippets.

@mdshw5
Created October 10, 2016 17:47
Show Gist options
  • Save mdshw5/9a6a401cdd917b596ac5443b3f39317f to your computer and use it in GitHub Desktop.
Save mdshw5/9a6a401cdd917b596ac5443b3f39317f to your computer and use it in GitHub Desktop.
canvas manifest creation
## This is a rule for use in Snakemake
rule create_canvas_xml:
input: fasta=config["mouse_fasta"]
output: xml="GenomeSize.xml", genome="genome.fa"
params: runtime="7200", memory="2G"
run:
from pyfaidx import Fasta
from collections import Counter
import hashlib
import os
with open(input.fasta) as fasta, open(output.genome, 'w') as genome:
for line in fasta:
genome.write(line)
with Fasta(input.fasta) as genome, open(output.xml, 'w') as genomesize:
genomesize.write('<sequenceSizes genomeName="{0}">\n'.format(os.path.basename(config['mouse_fasta'])))
for chrom in genome:
# <chromosome fileName="genome.fa" contigName="chrM" totalBases="16571" isCircular="false" md5="d2ed829b8a1628d16cbeee88e88e39eb" ploidy="2" knownBases="16571" type="Mitochondria" />
fileName = os.path.basename(input.fasta)
contigName = chrom.name
totalBases = len(chrom)
isCircular = "false"
md5 = hashlib.md5(str(chrom).encode('ascii')).hexdigest()
ploidy = "2"
counts = Counter(str(chrom).upper())
knownBases = str(counts['A'] + counts['T'] + counts['C'] + counts['G'])
if "M" in chrom.name:
type = "Mitochondria"
elif "X" in chrom.name:
type = "Allosome"
elif "Y" in chrom.name:
type = "Allosome"
else:
type = "Autosome"
genomesize.write('<chromosome fileName="{fileName}" contigName="{contigName}" totalBases="{totalBases}" isCircular="false" md5="{md5}" ploidy="2" knownBases="{knownBases}" type="{type}" />\n'.format(**locals()))
genomesize.write('</sequenceSizes>')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment