-
-
Save rweverwijk/8bff17568f791303b439 to your computer and use it in GitHub Desktop.
import sys | |
import os | |
import re | |
import xml.etree.ElementTree as ET | |
from py2neo import authenticate, Graph, Node, Relationship | |
authenticate("localhost:7474", "neo4j", "test") | |
graph = Graph("http://localhost:7474/db/data/") | |
ignored_elements = ['{http://www.w3.org/2001/XMLSchema}complexContent', | |
'{http://www.w3.org/2001/XMLSchema}sequence', | |
'{http://www.w3.org/2001/XMLSchema}schema', | |
'{http://www.w3.org/2001/XMLSchema}choice', | |
'{http://www.w3.org/2001/XMLSchema}selector', | |
'{http://www.w3.org/2001/XMLSchema}field', | |
'{http://www.w3.org/2001/XMLSchema}annotation', | |
'{http://www.w3.org/2001/XMLSchema}restriction', | |
'{http://www.w3.org/2001/XMLSchema}enumeration', | |
'{http://www.w3.org/2001/XMLSchema}pattern', | |
'{http://www.w3.org/2001/XMLSchema}minLength', | |
'{http://www.w3.org/2001/XMLSchema}maxLength', | |
'{http://www.w3.org/2001/XMLSchema}totalDigits', | |
'{http://www.w3.org/2001/XMLSchema}fractionDigits', | |
'{http://www.w3.org/2001/XMLSchema}import', | |
'{http://www.w3.org/2001/XMLSchema}anyAttribute', | |
'{http://www.w3.org/2001/XMLSchema}include', | |
'{http://www.w3.org/2001/XMLSchema}documentation'] | |
ns = {'xs': 'http://www.w3.org/2001/XMLSchema'} | |
def create(current): | |
node = Node(re.sub('^\{.*\}', '', current.tag), 'Xsd', name=current.attrib['name']) | |
graph.create(node) | |
return node | |
def merge(current): | |
label = re.sub('^\{.*\}', '', current.tag) | |
node = None | |
try: | |
node = graph.find_one(label, "name", current.attrib['name']) | |
except KeyError: | |
print "name not found: " + str(current) | |
if node is None: | |
node = create(current) | |
return node | |
def is_anonymous_complex_type(current): | |
return (current.tag == '{http://www.w3.org/2001/XMLSchema}complexType' or current.tag == '{http://www.w3.org/2001/XMLSchema}simpleType') and 'name' not in current.attrib | |
def recursive_print_childs(current, parentNode, level): | |
node = None | |
if current.tag == '{http://www.w3.org/2001/XMLSchema}extension': | |
props = {"complex_name": current.attrib['base']} | |
complex_node = graph.cypher.execute_one('MERGE (c:complexType:Xsd {name: {complex_name}}) return c', props) | |
rel = Relationship(parentNode, 'EXTENDS', complex_node) | |
graph.create(rel) | |
node = parentNode | |
elif current.tag not in ignored_elements and not is_anonymous_complex_type(current): | |
print("|"*level + current.attrib['name'] if 'name' in current.attrib else current.tag) | |
level = level + 1 | |
if 'element' in current.tag and parentNode: | |
node = create(current) | |
rel = Relationship(parentNode, 'HAS_ELEMENT', node) | |
graph.create(rel) | |
if 'type' in current.attrib: | |
props = {"complex_name": current.attrib['type']} | |
complex_node = graph.cypher.execute_one('MERGE (c:complexType:Xsd {name: {complex_name}}) return c', props) | |
rel = Relationship(node, 'HAS_TYPE', complex_node) | |
graph.create(rel) | |
else: | |
node = merge(current) | |
else: | |
node = parentNode | |
for child in current: | |
# try: | |
recursive_print_childs(child, node, level) | |
# except KeyError: | |
# print ("error in child, this is current: " + str(current.attrib)) | |
return node | |
if len(sys.argv) < 2: | |
print("Please give the directory containing the xsd files as argument") | |
exit(1) | |
for file in os.listdir(sys.argv[1]): | |
if (file.endswith(".xsd")): | |
print(file) | |
tree = ET.parse(sys.argv[1] + file) | |
root = tree.getroot() | |
recursive_print_childs(root, None, 0) |
Hi, wow! I'm surprised you have found it. I think this snippet is 10 years old.
To give you some background, this is probably a script that I wrote in a couple of hours to quickly show some nice insights.
If I would have to write it again, I would use the standard Neo4j python driver (https://neo4j.com/docs/api/python-driver/current/) at that moment it was not there yet. Start a single transaction and write a couple of Cypher statements in the recursive function.
The Cypher statement you see there is a Neo4j 3.X style and not valid anymore at this moment you would write it as:
MERGE (c:ComplexType:Xsd {name: $complex_name}) RETURN c
If I remember correctly graph.create
was a function to create Nodes/Relationships.
The function graph.cypher.execute_one
would create a transaction and execute a Cypher statement in that transaction.
I cannot recall why I made the mixture of graph.create
and graph.cypher.execute_one
. As mentioned with current knowledge and functionality write a couple of Cypher statements.
Have fun!
Have you ever redone this process with the last version of py2neo?
I can't get your script to run bc the newer version of py2new won't accept the graph.cypher.execute_one request and I'm not entirely sure what lines 58 and 71 are doing bc there's no longer documentation of version 3 for py2neo online.