Skip to content

Instantly share code, notes, and snippets.

@twaddlac
Created June 26, 2018 15:09
Show Gist options
  • Save twaddlac/fb96f71c580689d0bcd2c9c16b8a4101 to your computer and use it in GitHub Desktop.
Save twaddlac/fb96f71c580689d0bcd2c9c16b8a4101 to your computer and use it in GitHub Desktop.
XML to TSV parser for Michelle.
import xml.etree.ElementTree
import re
root = xml.etree.ElementTree.parse('RNAi.fixed.xml').getroot()
pattern = re.compile("\w")
def get_text_list(child,path):
l = list()
for i in child.findall(path):
if pattern.match(i.text):
l.append(str(i.text).rstrip())
return ';'.join(l)
def get_inhibit_children(child,path):
l = list()
for i in child.findall(path):
for j in i.findall('*'):
if pattern.match(j.text):
l.append(str(j.text).rstrip())
# print('j',str(j.text).rstrip())
for x in j.findall('**'):
l.append(str(x.text).rstrip())
# print('x',str(x.text).rstrip())
return ','.join(l)
print('\t'.join(['RNAi','History_name','Homol','PCR_product','Laboratory','Date','Predicted_gene','Gene','Transcript','Species','Paper','Phenotype']))
for child in root:
rnai_entry = list()
rnai_entry.append(str(child.text).rstrip())
if child.find('History_name') == None:
rnai_entry.append("NA")
else:
rnai_entry.append(child.find('History_name').find('Text').text)
rnai_entry.append(get_text_list(child,'.//Homol_data'))
rnai_entry.append(get_text_list(child,'.//PCR_product'))
if child.find('Experiment') == None:
rnai_entry.append("NA")
rnai_entry.append("NA")
else:
if child.find('Experiment').find('Laboratory') == None:
rnai_entry.append("NA")
else:
rnai_entry.append(child.find('Experiment').find('Laboratory').find('Laboratory').text)
if child.find('Experiment').find('Date') == None:
rnai_entry.append("NA")
else:
rnai_entry.append(child.find('Experiment').find('Date').find('Date').text)
rnai_entry.append(get_inhibit_children(child,'.//Predicted_gene'))
rnai_entry.append(get_inhibit_children(child,'.//Gene'))
rnai_entry.append(get_inhibit_children(child,'.//Transcript'))
if child.find('./Species/Species') == None:
rnai_entry.append("NA")
else:
rnai_entry.append(child.find('./Species/Species').text)
if child.find('./Reference/Paper') == None:
rnai_entry.append("NA")
else:
rnai_entry.append(child.find('./Reference/Paper').text)
phen_list = list()
for i in child.findall('./Phenotype//Phenotype'):
phen = list()
phen.append(str(i.text).rstrip())
for x in i.findall('*'):
for j in x.findall('*'):
phen.append(str(j.text).rstrip())
for y in j.findall('*'):
phen.append(str(y.text).rstrip())
phen_list.append(','.join(phen))
rnai_entry.append(';'.join(phen_list))
print('\t'.join(rnai_entry))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment