Skip to content

Instantly share code, notes, and snippets.

@afrendeiro
Last active August 29, 2015 14:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save afrendeiro/56fdbdca6982e4411b7c to your computer and use it in GitHub Desktop.
Save afrendeiro/56fdbdca6982e4411b7c to your computer and use it in GitHub Desktop.
Parses MAST xml output, extracts relevant info and outputs tab-delimited file
#!/usr/bin/env python
from argparse import ArgumentParser
from BeautifulSoup import BeautifulSoup
import csv
# argparser
parser = ArgumentParser(description = 'Parses MEME-MAST xml output.',
usage = 'python mast2tsv.py mast.output.xml mast.output.tsv')
# positional arguments
parser.add_argument('infile',
help = 'XML file from MAST.')
parser.add_argument('outfile',
help = 'Output filename.')
# parse
args = parser.parse_args()
try:
with open(args.infile, 'r') as f:
xml = BeautifulSoup(f)
# parse motifs
m = xml.mast.motifs.findAll("motif")
motifs = {}
for motif in m:
motifs[str(motif['id'])] = str(motif['name'])
# parse sequences
seqs = xml.mast.sequences.findAll("sequence")
output = [['peakName', 'motifPvalue', 'motifEvalue','motifPosition','motif']]
for seq in seqs:
if seq.hit:
output.append([str(seq['name']), float(seq.score['combined_pvalue']), float(seq.score['evalue']), str(seq.hit['pos']), motifs[seq.hit['motif']]])
else:
output.append([str(seq['name']), float(seq.score['combined_pvalue']), float(seq.score['evalue']), str('NA'), str('NA')])
except IOError:
print(" '%s' file not openable." % args.infile)
sys.exit(0)
try:
with open(args.outfile, 'wb') as f:
wr = csv.writer(f, delimiter = '\t', lineterminator='\n')
for line in output:
wr.writerow(line)
except IOError:
print(" '%s' file not writable." % args.outfile)
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment