Skip to content

Instantly share code, notes, and snippets.

@n-kb
Last active December 19, 2015 18:28
Show Gist options
  • Save n-kb/5998545 to your computer and use it in GitHub Desktop.
Save n-kb/5998545 to your computer and use it in GitHub Desktop.
This simple script roughly converts an OWL file created by Protégé into a models.py file to be used with Neo4Django. Comments/improvements very welcome!
from lxml import etree
# This string will contain the models.py file
modelsContents = "from neo4django.db import models\n\n"
# Enter the name of the OWL file to parse
# The relationships in the file should always start with has...
owlFile = "ontology.owl"
# Gives the ontology URI. Only needed for documentation purposes
ontologyURI = "http://www.semanticweb.org/nkb/ontologies/2013/6/impact-investment#"
# Adds a comment in the models.py file
modelsContents += "# The ontology can be found in its entirety at " + ontologyURI + "\n"
# Defines the owl and rdf namespaces
namespaces = {
'owl': 'http://www.w3.org/2002/07/owl#',
'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
'rdfs': 'http://www.w3.org/2000/01/rdf-schema#'
}
# This array contains the correspondance between data types
correspondanceTypes = {
"string" : "StringProperty",
"anyURI" : "URLProperty",
"int" : "IntegerProperty",
"nonNegativeInteger" : "IntegerProperty",
"nonPositiveInteger" : "IntegerProperty",
"PositiveInteger" : "IntegerProperty",
"NegativeInteger" : "IntegerProperty",
"integer" : "IntegerProperty",
"dateTimeStamp" : "DateTimeProperty",
"dateTime" : "DateTimeProperty",
"string" : "StringArrayProperty",
"boolean" : "BooleanProperty"
}
# Parses the file with etree
tree = etree.parse(owlFile)
root = tree.getroot()
# Finds all the Classes
for ontologyClassElement in root.findall("owl:Class", namespaces):
# Defines the array that contains the class information
ontologyClass = {}
# Finds the URI of the class
classURI = ontologyClassElement.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about"]
#Finds the name of the class
className = classURI.split("#")[1]
# By default, the class has no parent
parentClass = "models.NodeModel"
# Declares an array to store the relationships and properties from this class
relations = []
properties = []
# Finds all the subClasses of the Class
for subClassElement in ontologyClassElement.findall("rdfs:subClassOf", namespaces):
# If the Class is actually an extension of another Class
if "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource" in subClassElement.attrib:
parentClassURI = subClassElement.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"]
parentClass = parentClassURI.split("#")[1]
else:
for restriction in subClassElement.findall("owl:Restriction", namespaces):
# If there is a relationship defined in the subclass
if restriction.find("owl:onClass", namespaces) is not None:
# Finds the relationship and its elements (destination Class and type)
relationClass = restriction.find("owl:onClass", namespaces)
relation = {}
relation["URI"] = relationClass.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"]
relation["name"] = relation["URI"].split("#")[1]
# Exception when the relation's destination is an individual from the same class
if relation["name"] == className:
relation["name"] = 'self'
relationType = restriction.find("owl:onProperty", namespaces)
relationTypeURI = relationType.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"]
relation["type"] = relationTypeURI.split("#")[1]
# Guesses the destination of the relation based on the name. Name should be "has..."
if relation["type"].find('has') == 0:
relation["destination"] = relation["type"][3:].lower()
# Adds the relationship to the array containing all relationships for the class only if the relation has a destination
if "destination" in relation:
relations.append(relation)
# If there is a property defined in the subclass
elif restriction.find("owl:onDataRange", namespaces) is not None or restriction.find("owl:someValuesFrom", namespaces) is not None:
propertyTypeElement = restriction.find("owl:onProperty", namespaces)
propertyTypeURI = propertyTypeElement.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"]
propertyType = propertyTypeURI.split("#")[1]
if restriction.find("owl:onDataRange", namespaces) is not None:
dataTypeElement = restriction.find("owl:onDataRange", namespaces)
else:
dataTypeElement = restriction.find("owl:someValuesFrom", namespaces)
dataTypeURI = dataTypeElement.attrib["{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource"]
dataType = correspondanceTypes[dataTypeURI.split("#")[1]]
prop = {
"name" : propertyType,
"type" : dataType
}
properties.append(prop)
# Writes the class in models.py
modelsContents += "\n class "+ className +"(" + parentClass + "):\n"
# Writes the properties
for prop in properties:
modelsContents += "\t" + prop["name"] + " = models." + prop["type"] + "()\n"
# Writes the relationships
for relation in relations:
modelsContents += "\t" + relation["destination"] + " = models.Relationship(" + relation["name"] + ",rel_type='" + relation["type"] + "')\n"
print modelsContents
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment