Last active
August 29, 2015 14:04
-
-
Save afrendeiro/56fdbdca6982e4411b7c to your computer and use it in GitHub Desktop.
Parses MAST xml output, extracts relevant info and outputs tab-delimited file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from argparse import ArgumentParser | |
from BeautifulSoup import BeautifulSoup | |
import csv | |
# argparser | |
parser = ArgumentParser(description = 'Parses MEME-MAST xml output.', | |
usage = 'python mast2tsv.py mast.output.xml mast.output.tsv') | |
# positional arguments | |
parser.add_argument('infile', | |
help = 'XML file from MAST.') | |
parser.add_argument('outfile', | |
help = 'Output filename.') | |
# parse | |
args = parser.parse_args() | |
try: | |
with open(args.infile, 'r') as f: | |
xml = BeautifulSoup(f) | |
# parse motifs | |
m = xml.mast.motifs.findAll("motif") | |
motifs = {} | |
for motif in m: | |
motifs[str(motif['id'])] = str(motif['name']) | |
# parse sequences | |
seqs = xml.mast.sequences.findAll("sequence") | |
output = [['peakName', 'motifPvalue', 'motifEvalue','motifPosition','motif']] | |
for seq in seqs: | |
if seq.hit: | |
output.append([str(seq['name']), float(seq.score['combined_pvalue']), float(seq.score['evalue']), str(seq.hit['pos']), motifs[seq.hit['motif']]]) | |
else: | |
output.append([str(seq['name']), float(seq.score['combined_pvalue']), float(seq.score['evalue']), str('NA'), str('NA')]) | |
except IOError: | |
print(" '%s' file not openable." % args.infile) | |
sys.exit(0) | |
try: | |
with open(args.outfile, 'wb') as f: | |
wr = csv.writer(f, delimiter = '\t', lineterminator='\n') | |
for line in output: | |
wr.writerow(line) | |
except IOError: | |
print(" '%s' file not writable." % args.outfile) | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment