Created
October 27, 2020 09:37
-
-
Save nickhowston/52445dcea9992efd2e01df486c5e72e0 to your computer and use it in GitHub Desktop.
this file contains methods built for processing GeneProductAssociations from SBML model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
this code is an example of a parsing method to extract GeneProductAssociations from SBML file | |
support version 2 of SBML level 3 Flux Balance Constraints(“fbc”) package | |
with the aid of Libsbml library | |
""" | |
from itertools import chain, combinations | |
import libsbml | |
import os | |
class GeneProteinReactionAssociationParsingSBML: | |
def __init__(self, sbml_file): | |
self.logics = ['or', 'and'] | |
self.sbml_file = sbml_file | |
self.sbml_level = None | |
self.sbml_version = None | |
self.fbc_version = None | |
self.document = None | |
self.model = None | |
self.reactions = {} | |
self.gpr = {} | |
def execute(self): | |
print(f"Reading {os.path.basename(self.sbml_file)}") | |
self.read_sbml_file() | |
print(f"This is a SBML Level {self.sbml_level}, Version {self.sbml_version} Document") | |
if self.fbc_version: | |
print(f"Document supports FBC Plugin version {self.fbc_version}") | |
if self.model is None: | |
print("No model present.") | |
self.get_reactions() | |
self.process_gpr_in_reactions() | |
print(f"\nThe Gene-Protein-Reaction Associations in this file: \n{self.gpr}\n") | |
return self.gpr | |
def read_sbml_file(self): | |
# first read the document (i.e. parsed XML) | |
self.document = self.read_document(self.sbml_file) | |
# read SBML Level | |
self.sbml_level = self.read_sbml_level() | |
# read SBML Version | |
self.sbml_version = self.read_sbml_version() | |
# read FBC Plugin Version | |
self.fbc_version = self.read_fbc_version() | |
# read model | |
self.model = self.read_model() | |
def process_gpr_in_reactions(self): | |
for reaction in self.reactions.items(): | |
reaction_name = reaction[0] | |
fbc = reaction[1].getPlugin("fbc") | |
gpa = fbc.getGeneProductAssociation() | |
if gpa is None: | |
continue | |
association = gpa.getAssociation() | |
self.gpr[reaction_name] = self.process_gpr(association, current_list=[]) | |
def process_gpr(self, node, current_list, output=None): | |
""" | |
This is a recursive function that specifically process all elements under 'geneProductAssociation' in a sbml | |
format file | |
:param node: object created from libsbml.py | |
:param current_list: keep track of what genes are in the current branch the node is in | |
:param output: the final output, type as list | |
:return: type as list [['g1,g2,g3'], ['g1,g3,g4'], ['g2,g3,g5'], ...] | |
""" | |
if output is None: | |
output = [] | |
try: | |
children = node.getListOfAssociations() | |
except AttributeError: | |
return [node.gene_product] | |
temp_condition_all_gene = True | |
temp_condition_all_logi = True | |
for child in children: | |
if child.element_name in self.logics: | |
temp_condition_all_gene = False | |
if child.element_name not in self.logics: | |
temp_condition_all_logi = False | |
if node.element_name == 'and': | |
temp_list = [] | |
for child in children: | |
if child.element_name == 'geneProductRef': | |
temp_list.append(child.gene_product) | |
if temp_condition_all_gene: | |
output.append(temp_list) | |
elif temp_condition_all_logi: | |
temp_output = [] | |
for child in children: | |
temp_output.append(self.process_gpr(child, current_list=current_list, output=[])) | |
processed = self.combination_lists(temp_output) | |
for ele in processed: | |
ele.extend(current_list) | |
output.extend(processed) | |
elif not children: | |
output.append(current_list) | |
else: | |
part_of_gpr = [] | |
for child in children: | |
if child.element_name in self.logics: | |
part_of_gpr.append(self.process_gpr(node=child, current_list=[], output=[])) | |
processed_gpr = self.combination_lists(part_of_gpr) | |
for ele in processed_gpr: | |
ele.extend(temp_list) | |
output.extend(processed_gpr) | |
elif node.element_name == 'or': | |
if not children: | |
output.append(current_list) | |
for child in children: | |
temp_list = list(current_list) | |
if child.element_name == 'geneProductRef': | |
temp_list.append(child.gene_product) | |
output.append(temp_list) | |
else: | |
self.process_gpr(node=child, current_list=temp_list, output=output) | |
else: | |
pass | |
return output | |
@staticmethod | |
def read_document(location): | |
# create libsbml reader | |
reader = libsbml.SBMLReader() | |
if reader is None: | |
raise ValueError("Could not initiate SBMLReader object.") | |
# read out the sbml file content | |
document = reader.readSBMLFromFile(location) | |
# check for errors | |
if isinstance(document, libsbml.SBMLDocument): | |
if document.getNumErrors() > 0: | |
print("Read Error(s):\n") | |
document.printErrors() | |
if document.getError(0).getErrorId() == libsbml.XMLFileUnreadable: | |
raise ValueError("Unreadable file error. File is missing and/or no access privileges to read.") | |
print("Correct the above and re-run") | |
return document | |
def read_sbml_level(self): | |
"""Read SBML Level""" | |
return self.document.getLevel() | |
def read_sbml_version(self): | |
"""Read SBML Version""" | |
return self.document.getVersion() | |
def read_fbc_version(self) -> str or None: | |
"""Read fbc version (if available)""" | |
fbc_plugin = self.document.getPlugin("fbc") | |
if fbc_plugin: | |
return fbc_plugin.getPackageVersion() | |
return None | |
def read_model(self): | |
"""Obtain Model definition in document""" | |
model = self.document.getModel() | |
if not isinstance(model, libsbml.Model): | |
raise ValueError("No model is present! Unable to work with this file.") | |
return model | |
def get_reactions(self): | |
"""Obtain All Reactions in the models""" | |
reactions = self.model.getListOfReactions() | |
if reactions is None: | |
print("No Reactions found! Check the file") | |
for element in reactions: | |
self.reactions[element.id] = element | |
@staticmethod | |
def combination_lists(lists): | |
""" | |
:param lists: [["g1","g2","g3"],["g4","g5, g6"],["g7, g8"]] | |
:return: [["g1,g4,g7,g8"], ["g1,g5,g6,g7,g8"], ["g2, g4, g7, g8"], ["g2, g5, g6, g7, g8"], ["g3, g4, g7, g8"], | |
["g3, g5, g6, g7, g8"]] | |
""" | |
combination = [] | |
for i in combinations(chain.from_iterable(lists), len(lists)): | |
all_in_state = True | |
for j in range(0, len(lists)): | |
if i[j] not in lists[j]: | |
all_in_state = False | |
if all_in_state: | |
temp = [] | |
for gene in chain.from_iterable(i): | |
temp.append(gene) | |
combination.append(temp) | |
return combination | |
if __name__ == '__main__': | |
GPRA = GeneProteinReactionAssociationParsingSBML(sbml_file="") | |
GPRA.execute() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment