dennisverspuij/neo4j_explore.py

## neo4j_explore.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Python 2/3 utility for interactively unraveling structure from a Neo4j graph database.
First adjust the DRIVER statement for the database you wish to explore. Then run python neoexplore.py.
The script will first scan the database for unique combinations of labels, and offer you queries to explore these.
E.g. >0 (A, B)*123 presents a query for exploring 123 nodes with both labels A and B by typing >0.
First the possible properties are listed, e.g. .[someprop]*67 means 67 out of 123 nodes have property someprop.
The square brackets emphasize the property does not always exist.
Second possible outgoing relations are listed, e.g. <>5 >10* [somereltype]*50 (C, D)*20 means 10 out of the 123
nodes together have 50 outgoing relations of type somereltype to 20 distinct nodes with both labels C and D.
Again the square brackets emphasize the relation does not always exist.
To explore the target (C, D) nodes you can type >5. Or if you wish to explore the source (A, B) nodes that do
have these somereltype relations to (C, D) nodes type <5.
To iterate source or target nodes and print their property values type <5= or >5= respectively.
You can additionally filter for having or not-having certain properties by adding .someprop or !someprop sequences
after the query number. E.g. >5.someprop!otherprop will list only those that do have someprop but not otherprop.
Special query >5.= lists nodes that have at least one property, and >5=! lists nodes that have no properties at all.
Besides using queries offered by the script you can add and run manual queries as well. First type the subject
variable of the query, e.g. n, and then type your query without RETURN/ORDER clause followed by an extra newline,
e.g. MATCH (n) WHERE id(n) < 10.
'''

from __future__ import absolute_import, division, print_function
import sys
import readline
import re
from collections import OrderedDict
from hashlib import sha256 as hashfunc
from neo4j.v1 import GraphDatabase, basic_auth


# Settings: ADJUST ME!
DRIVER = GraphDatabase.driver(uri=u'bolt://HOST_OR_IP:7687', auth=basic_auth(u'USER', u'PASSWORD'))

# Utils
read_stdin = input if vars(__builtins__).get('raw_input') is None else lambda prompt: raw_input(prompt).decode(sys.stdin.encoding)

# Model
class Query:
    VARFORMAT = u'_{var}'   # We format our own variables like this, avoid using vars like these in manual queries!

    def __init__(self, inspector, index, hash, description, cypher, var_to, cnt_to=0, cnt_rel=0, var_from=None, cnt_from=0, parent=None):
        self.inspector = inspector  # Neo4jInspector bound to
        self.index = index  # index in collections this Query is used in
        self.hash = hash  # hash of this query and its parent
        self.description = description  # description (unicode)
        self.cypher = cypher  # cypher query (unicode)
        self.var_to = var_to  # variable name of the target subject in the cypher query (unicode)
        self.cnt_to = cnt_to  # last nr matching target nodes (int), 0 if unknown yet
        self.cnt_rel = cnt_rel  # last nr of relations (int), 0 if unknown or not applicable
        self.var_from = var_from  # variable name of the originating subject in the cypher query (unicode), 0 if not applicable
        self.cnt_from = cnt_from  # last nr matching originating nodes (int), 0 if unknown or not applicable
        self.parent = parent  # parent query (Query)

    def var(self, alias):
        return self.VARFORMAT.format(var=u'{0}{1}'.format(alias, self.index))

    def get_cypher(self):
        return (u'' if self.parent is None else (self.parent.get_cypher() + u'\n')) + \
               self.cypher  # u'{0} /*#1*/'.format(self.cypher, self.index)

    def get_description(self, full=True, with_index=True):
        description = (u'' if (not full) or self.parent is None else (self.parent.get_description(True, False) + u' ')) + \
                      self.description.format(cnt_from=self.cnt_from, cnt_rel=self.cnt_rel, cnt_to=self.cnt_to)
        if with_index:
            return u'{0:<5}  {1}'.format(u'{0}>{1}'.format(u' ' if self.var_from is None else u'<', self.index), description)
        else:
            return description

    @staticmethod
    def _where_props(subjectvar, propsq):
        propsq = { m.group(2).strip(): (m.group(1) == u'.') for m in re.compile(u'([.!])([^.!]*)', re.UNICODE).finditer(propsq) }
        where = []
        for key, has_or_not in propsq.items():
            if key != u'':
                where.append(u'({0}EXISTS({1}.{2}))'.format(u'' if has_or_not else u'NOT ', subjectvar, key))
            else:
                where = [u'properties({0}) {1} {{}}'.format(subjectvar, u'<>' if has_or_not else u'=')]
                break
        return (u' WHERE ' + u' AND '.join(where)) if len(where) else u''

    def inspect(self, use_var_from=False, propsq=u''):
        subjectvar = self.var_from if use_var_from else self.var_to
        if subjectvar is None:
            raise ValueError(u'No source subject in this query')
        where = u'WITH *' + self._where_props(subjectvar, propsq)
        cypher = u'{0}\n{1}'.format(self.get_cypher(), where)
        print(cypher + u'\nRETURN DISTINCT ' + subjectvar)

        if self.var_from is None:
            cntq = u'{0} RETURN 0 as cnt_from, 0 as cnt_rel, count(DISTINCT {1}) AS cnt_to'.format(cypher, self.var_to)
        else:
            cntq = u'{0} RETURN count(DISTINCT {1}) AS cnt_from, count(*) AS cnt_rel, count(DISTINCT {2}) AS cnt_to'.format(cypher, self.var_from, self.var_to)
        try:
            r = next(iter(self.inspector.run_query(cntq)))
            subjectcnt = r[u'cnt_from'] if use_var_from else r[u'cnt_to']
            if propsq == u'':
                self.cnt_from, self.cnt_rel, self.cnt_to = (r[u'cnt_from'], r[u'cnt_rel'], r[u'cnt_to'])
        except StopIteration:
            subjectcnt = 0
        print(u'== {0:=<77}'.format(u'{0}*{1:d} '.format(subjectvar, subjectcnt)))
        if subjectcnt == 0:
            return
        cypher += '\nWITH DISTINCT ' + subjectvar

        for key in self.inspector.propertykeys:
            cnt = next(iter(self.inspector.run_query(u'{0} WHERE exists({1}.`{2}`) RETURN count(*) AS cnt'.format(cypher, subjectvar, key))))[u'cnt']
            if cnt > 0:
                print(u'       .{0}*{1:d}'.format(key if cnt == subjectcnt else u'[{0}]'.format(key), cnt))

        var_rel = self.var(u'r')
        var_to = self.var(u'n')
        relqueries = []
        for r in self.inspector.run_query(u'{0} MATCH ({1})-[{2}]->({3}) WITH {1}, {2}, {3} RETURN DISTINCT type({2}) AS type, labels({3}) AS labels, count(DISTINCT {1}) AS cnt_from, count(DISTINCT {2}) AS cnt_rel, count(DISTINCT {3}) AS cnt_to'.format(cypher, subjectvar, var_rel, var_to)):
            if r[u'cnt_rel'] > 0:
                labels = self.inspector.sort_labels(r[u'labels'])
                relqueries.append((
                    r[u'type'],
                    self.inspector.set(
                        description=u'{propsq}>*{{cnt_from:d}} {type}*{{cnt_rel:d}} ( {labels} )*{{cnt_to:d}}'.format(
                            propsq=propsq, type=r[u'type'] if r[u'cnt_from'] == subjectcnt else u'[{0}]'.format(r[u'type']),
                            labels=', '.join(labels)
                        ),
                        cypher=u'{0} MATCH ({1})-[{2}:`{3}`]->({4}) WHERE {4}:`{5}` AND size(labels({4})) = {6}'.format(
                            where, subjectvar, var_rel, r[u'type'], var_to, u'` AND {0}:`'.format(var_to).join(labels), len(labels)
                        ),
                        var_to=var_to,
                        cnt_to=r[u'cnt_to'],
                        cnt_rel=r[u'cnt_rel'],
                        var_from=subjectvar,
                        cnt_from=r[u'cnt_from'],
                        parent=self
                    )
                ))
        for type, relquery in sorted(relqueries, key=lambda r: (self.inspector.relationtypes[r[0]], r[0])):
            print(relquery.get_description(False))

        print(u'{0:=<80}'.format(u''))

    def list(self, use_var_from=False, propsq=u''):
        subjectvar = self.var_from if use_var_from else self.var_to
        if subjectvar is None:
            raise ValueError(u'No source subject in this query')

        var_rel = self.var(u'r')
        var_to = self.var(u'n')
        cypher = u'{0}\nWITH DISTINCT {2}{1}\nOPTIONAL MATCH ({2})-[{3}]->({4})'.format(self.get_cypher(), self._where_props(subjectvar, propsq), subjectvar, var_rel, var_to)
        print(cypher + u'\nRETURN DISTINCT ' + subjectvar)
        cypher += u'\nRETURN DISTINCT {0}, type({1}) as type, labels({2}) as labels, count(DISTINCT {2}) as cnt_to ORDER BY id({0}), type, labels'.format(subjectvar, var_rel, var_to)

        print(u'{0:=<80}'.format(u''))
        prev_id = None
        for r in self.inspector.run_query(cypher):
            node = r[subjectvar]
            if node.id != prev_id:
                if prev_id is not None:
                    try:
                        read_stdin()
                    except:
                        break
                prev_id = node.id
                print(u'\n({1}) {0:d}:'.format(node.id, u', '.join(self.inspector.sort_labels(node.labels))))
                for key, value in self.inspector.sort_properties(node.properties).items():
                    print(u'.{0:<16s}\t{1}'.format(key, repr(value)))
            if r[u'type'] is not None:
                print(u'> {0} ({1})*{2}'.format(r[u'type'], u', '.join(self.inspector.sort_labels(r[u'labels'])), r[u'cnt_to']))
        print(u'\n{0:=<80}'.format(u''))


class Neo4jInspector():
    def __init__(self, driver):
        self.driver = driver  # Neo4j GraphDatabase driver
        self.reset()

    def reset(self):
        self.queriesByIndex = []
        self.queriesByHash = {}
        self.label_uses = {}

        print(u'Scanning labels')
        labelsets = [[r[u'labels'], r[u'cnt']] for r in self.run_query(u'MATCH (n) RETURN DISTINCT labels(n) AS labels, count(*) AS cnt')]
        for labels, cnt in labelsets:
            for label in labels:
                self.label_uses[label] = self.label_uses[label] + 1 if label in self.label_uses else 1
        for labels, cnt in sorted((self.sort_labels(labels), cnt) for labels, cnt in labelsets):
            self.set(
              description=u'( {0} )*{{cnt_to:d}}'.format(', '.join(labels)),
              cypher=u'MATCH (n) WHERE n:`{0}` AND size(labels(n)) = {1:d}'.format(u'` AND n:`'.join(labels), len(labels)),
              var_to=u'n',
              cnt_to=cnt
            )

        print(u'Scanning relation types')
        self.relationtypes = OrderedDict(
            (r[u'type'], index)
            for index, r in enumerate(self.run_query(u'MATCH ()-[r]->(n) RETURN DISTINCT type(r) as type, count(DISTINCT labels(n)) AS cnt ORDER BY cnt DESC, type ASC'))
        )

        print(u'Scanning property keys')
        self.propertykeys = OrderedDict(
          (key, index)
          for index, (cnt, key) in enumerate(sorted(
            (-next(iter(self.run_query(u'MATCH (n) WHERE exists(n.`{0}`) RETURN count(DISTINCT labels(n)) AS cnt'.format(r[u'propertyKey']))))[u'cnt'], r[u'propertyKey'])
            for r in self.run_query(u'CALL db.propertyKeys()')
          ))
        )

    def sort_labels(self, labels):
        return [label for uses, label in sorted(zip([self.label_uses[label] for label in labels], labels))]

    def sort_properties(self, properties):
        return OrderedDict((key, value) for index, (key, value) in sorted(zip([self.propertykeys[key] for key in properties], properties.items())))

    def __len__(self):
        return len(self.queriesByIndex)

    def __iter__(self):
        return iter(self.queriesByIndex)

    def get(self, index):
        return self.queriesByIndex[index]

    def set(self, description, cypher, var_to, cnt_to=0, cnt_rel=0, var_from=None, cnt_from=0, parent=None):
        hash = hashfunc((cypher + (u'' if parent is None else parent.hash)).encode('UTF-8')).hexdigest()
        if hash not in self.queriesByHash:
            query = Query(self, len(self.queriesByIndex), hash, description, cypher, var_to, cnt_to, cnt_rel, var_from, cnt_from, parent)
            self.queriesByIndex.append(query)
            self.queriesByHash[hash] = query
        else:
            query = self.queriesByHash[hash]
            query.cnt_from, query.cnt_rel, query.cnt_to = (cnt_from, cnt_rel, cnt_to)
        return query

    def pop(self):
        self.queriesByHash.pop(self.queriesByIndex.pop().hash)

    def clear(self, till_index=0):
        for index in range(till_index, len(self.queriesByIndex)):
            self.pop()

    def run_query(self, *arg, **kwarg):
        with self.driver.session() as session:
            return session.run(*arg, **kwarg)


# Main
inspector = Neo4jInspector(DRIVER)
nr_initial_queries = len(inspector)
query_re = re.compile(u'^(?:(?P<var_to>[`a-z][^.!=]*)|(?P<from_or_to>[<>])?(?P<nr>\d+))(?P<propsq>(?:[.!].*?)*)?(?P<list>=)?$', re.UNICODE | re.IGNORECASE)
do_print_queries = True
do_print_help = True
while True:
    if do_print_queries:
        do_print_queries = False
        print(u'Queries (#):')
        for index, query in enumerate(inspector):
            if index == nr_initial_queries:
                print(u'-----')
            print(query.get_description())
        print(u'')

    if do_print_help:
        do_print_help = False
        print(u'Commands:')
        print(u'(<|>)#[(.|!)[prop]...][=]')
        print(u'    Inspect or list (=) the source (<) or target (>) nodes of given query (#),')
        print(u'    optionally filtered by having (.) or lacking (!) given properties')
        print(u'    (all or none if prop omitted).\n')
        print(u'subject[(.|!)[prop]...][=]\\ncypher\\n\\n')
        print(u'    Inspect or list (=) the nodes of given subject from given Cypher query')
        print(u'    (without RETURN and ORDER BY clauses). The query will also be added to the')
        print(u'    list of available queries.\n')
        print(u':')
        print(u'    List queries\n')
        print(u'~')
        print(u'    Drop all but the initial queries for unique label combinations\n')
        print(u'?')
        print(u'    Print this help information\n')

    try:
        line = read_stdin(u'>>> ').strip()
    except EOFError as e:
        break
    except:
        print(u'')
        continue

    if line == u'':
        continue
    elif line == u':':
        do_print_queries = True
    elif line == u'?':
        do_print_help = True
    elif line == u'~':
        inspector.clear(nr_initial_queries)
        print(u'Reset to initial queries\n')
    else:
        m = query_re.search(line)
        if not m:
            print(u'Invalid command (? = help)\n')
        else:
            try:
                # Fetch query
                if m.group(u'var_to') is None:  # Defined query
                    query = inspector.get(int(m.group(u'nr')))
                    use_from = (m.group(u'from_or_to') == u'<')
                else:  # Manual query
                    var_to = m.group(u'var_to')
                    cypher = u''
                    try:
                        while True:
                            line = read_stdin(u'... ').strip()
                            if line == u'':
                                break
                            cypher += u'\n' + line
                    except EOFError as e:
                        continue
                    query = inspector.set(
                      description=u'| {0} |*{{cnt_to}})'.format(cypher[1:].replace(u'\n', u'').replace(u'{', u'{{').replace(u'}', u'}}')),
                      cypher=cypher[1:],
                      var_to=var_to
                    )
                    use_from = False

                # Inspect or list
                if m.group(u'list') is None:
                    query.inspect(use_from, m.group(u'propsq'))
                else:
                    query.list(use_from, m.group(u'propsq'))
                print(u'')

            except BaseException as e:
                print(u'ERROR: {0}\n'.format(e))
                if (m.group(u'var_to') is not None) and (query.index == (len(inspector)-1)):
                    inspector.pop()  # Drop newly added manual query

print(u'')
sys.exit(0)
	#!/usr/bin/env python
	# -- coding: utf-8 --
	'''
	Python 2/3 utility for interactively unraveling structure from a Neo4j graph database.
	First adjust the DRIVER statement for the database you wish to explore. Then run python neoexplore.py.
	The script will first scan the database for unique combinations of labels, and offer you queries to explore these.
	E.g. >0 (A, B)*123 presents a query for exploring 123 nodes with both labels A and B by typing >0.
	First the possible properties are listed, e.g. .[someprop]*67 means 67 out of 123 nodes have property someprop.
	The square brackets emphasize the property does not always exist.
	Second possible outgoing relations are listed, e.g. <>5 >10* [somereltype]50 (C, D)20 means 10 out of the 123
	nodes together have 50 outgoing relations of type somereltype to 20 distinct nodes with both labels C and D.
	Again the square brackets emphasize the relation does not always exist.
	To explore the target (C, D) nodes you can type >5. Or if you wish to explore the source (A, B) nodes that do
	have these somereltype relations to (C, D) nodes type <5.
	To iterate source or target nodes and print their property values type <5= or >5= respectively.
	You can additionally filter for having or not-having certain properties by adding .someprop or !someprop sequences
	after the query number. E.g. >5.someprop!otherprop will list only those that do have someprop but not otherprop.
	Special query >5.= lists nodes that have at least one property, and >5=! lists nodes that have no properties at all.
	Besides using queries offered by the script you can add and run manual queries as well. First type the subject
	variable of the query, e.g. n, and then type your query without RETURN/ORDER clause followed by an extra newline,
	e.g. MATCH (n) WHERE id(n) < 10.
	'''

	from __future__ import absolute_import, division, print_function
	import sys
	import readline
	import re
	from collections import OrderedDict
	from hashlib import sha256 as hashfunc
	from neo4j.v1 import GraphDatabase, basic_auth


	# Settings: ADJUST ME!
	DRIVER = GraphDatabase.driver(uri=u'bolt://HOST_OR_IP:7687', auth=basic_auth(u'USER', u'PASSWORD'))

	# Utils
	read_stdin = input if vars(__builtins__).get('raw_input') is None else lambda prompt: raw_input(prompt).decode(sys.stdin.encoding)

	# Model
	class Query:
	VARFORMAT = u'_{var}' # We format our own variables like this, avoid using vars like these in manual queries!

	def __init__(self, inspector, index, hash, description, cypher, var_to, cnt_to=0, cnt_rel=0, var_from=None, cnt_from=0, parent=None):
	self.inspector = inspector # Neo4jInspector bound to
	self.index = index # index in collections this Query is used in
	self.hash = hash # hash of this query and its parent
	self.description = description # description (unicode)
	self.cypher = cypher # cypher query (unicode)
	self.var_to = var_to # variable name of the target subject in the cypher query (unicode)
	self.cnt_to = cnt_to # last nr matching target nodes (int), 0 if unknown yet
	self.cnt_rel = cnt_rel # last nr of relations (int), 0 if unknown or not applicable
	self.var_from = var_from # variable name of the originating subject in the cypher query (unicode), 0 if not applicable
	self.cnt_from = cnt_from # last nr matching originating nodes (int), 0 if unknown or not applicable
	self.parent = parent # parent query (Query)

	def var(self, alias):
	return self.VARFORMAT.format(var=u'{0}{1}'.format(alias, self.index))

	def get_cypher(self):
	return (u'' if self.parent is None else (self.parent.get_cypher() + u'\n')) + \
	self.cypher # u'{0} /#1/'.format(self.cypher, self.index)

	def get_description(self, full=True, with_index=True):
	description = (u'' if (not full) or self.parent is None else (self.parent.get_description(True, False) + u' ')) + \
	self.description.format(cnt_from=self.cnt_from, cnt_rel=self.cnt_rel, cnt_to=self.cnt_to)
	if with_index:
	return u'{0:<5} {1}'.format(u'{0}>{1}'.format(u' ' if self.var_from is None else u'<', self.index), description)
	else:
	return description

	@staticmethod
	def _where_props(subjectvar, propsq):
	propsq = { m.group(2).strip(): (m.group(1) == u'.') for m in re.compile(u'([.!])([^.!]*)', re.UNICODE).finditer(propsq) }
	where = []
	for key, has_or_not in propsq.items():
	if key != u'':
	where.append(u'({0}EXISTS({1}.{2}))'.format(u'' if has_or_not else u'NOT ', subjectvar, key))
	else:
	where = [u'properties({0}) {1} {{}}'.format(subjectvar, u'<>' if has_or_not else u'=')]
	break
	return (u' WHERE ' + u' AND '.join(where)) if len(where) else u''

	def inspect(self, use_var_from=False, propsq=u''):
	subjectvar = self.var_from if use_var_from else self.var_to
	if subjectvar is None:
	raise ValueError(u'No source subject in this query')
	where = u'WITH *' + self._where_props(subjectvar, propsq)
	cypher = u'{0}\n{1}'.format(self.get_cypher(), where)
	print(cypher + u'\nRETURN DISTINCT ' + subjectvar)

	if self.var_from is None:
	cntq = u'{0} RETURN 0 as cnt_from, 0 as cnt_rel, count(DISTINCT {1}) AS cnt_to'.format(cypher, self.var_to)
	else:
	cntq = u'{0} RETURN count(DISTINCT {1}) AS cnt_from, count(*) AS cnt_rel, count(DISTINCT {2}) AS cnt_to'.format(cypher, self.var_from, self.var_to)
	try:
	r = next(iter(self.inspector.run_query(cntq)))
	subjectcnt = r[u'cnt_from'] if use_var_from else r[u'cnt_to']
	if propsq == u'':
	self.cnt_from, self.cnt_rel, self.cnt_to = (r[u'cnt_from'], r[u'cnt_rel'], r[u'cnt_to'])
	except StopIteration:
	subjectcnt = 0
	print(u'== {0:=<77}'.format(u'{0}*{1:d} '.format(subjectvar, subjectcnt)))
	if subjectcnt == 0:
	return
	cypher += '\nWITH DISTINCT ' + subjectvar

	for key in self.inspector.propertykeys:
	cnt = next(iter(self.inspector.run_query(u'{0} WHERE exists({1}.`{2}`) RETURN count(*) AS cnt'.format(cypher, subjectvar, key))))[u'cnt']
	if cnt > 0:
	print(u' .{0}*{1:d}'.format(key if cnt == subjectcnt else u'[{0}]'.format(key), cnt))

	var_rel = self.var(u'r')
	var_to = self.var(u'n')
	relqueries = []
	for r in self.inspector.run_query(u'{0} MATCH ({1})-[{2}]->({3}) WITH {1}, {2}, {3} RETURN DISTINCT type({2}) AS type, labels({3}) AS labels, count(DISTINCT {1}) AS cnt_from, count(DISTINCT {2}) AS cnt_rel, count(DISTINCT {3}) AS cnt_to'.format(cypher, subjectvar, var_rel, var_to)):
	if r[u'cnt_rel'] > 0:
	labels = self.inspector.sort_labels(r[u'labels'])
	relqueries.append((
	r[u'type'],
	self.inspector.set(
	description=u'{propsq}>{{cnt_from:d}} {type}{{cnt_rel:d}} ( {labels} )*{{cnt_to:d}}'.format(
	propsq=propsq, type=r[u'type'] if r[u'cnt_from'] == subjectcnt else u'[{0}]'.format(r[u'type']),
	labels=', '.join(labels)
	),
	cypher=u'{0} MATCH ({1})-[{2}:`{3}`]->({4}) WHERE {4}:`{5}` AND size(labels({4})) = {6}'.format(
	where, subjectvar, var_rel, r[u'type'], var_to, u'` AND {0}:`'.format(var_to).join(labels), len(labels)
	),
	var_to=var_to,
	cnt_to=r[u'cnt_to'],
	cnt_rel=r[u'cnt_rel'],
	var_from=subjectvar,
	cnt_from=r[u'cnt_from'],
	parent=self
	)
	))
	for type, relquery in sorted(relqueries, key=lambda r: (self.inspector.relationtypes[r[0]], r[0])):
	print(relquery.get_description(False))

	print(u'{0:=<80}'.format(u''))

	def list(self, use_var_from=False, propsq=u''):
	subjectvar = self.var_from if use_var_from else self.var_to
	if subjectvar is None:
	raise ValueError(u'No source subject in this query')

	var_rel = self.var(u'r')
	var_to = self.var(u'n')
	cypher = u'{0}\nWITH DISTINCT {2}{1}\nOPTIONAL MATCH ({2})-[{3}]->({4})'.format(self.get_cypher(), self._where_props(subjectvar, propsq), subjectvar, var_rel, var_to)
	print(cypher + u'\nRETURN DISTINCT ' + subjectvar)
	cypher += u'\nRETURN DISTINCT {0}, type({1}) as type, labels({2}) as labels, count(DISTINCT {2}) as cnt_to ORDER BY id({0}), type, labels'.format(subjectvar, var_rel, var_to)

	print(u'{0:=<80}'.format(u''))
	prev_id = None
	for r in self.inspector.run_query(cypher):
	node = r[subjectvar]
	if node.id != prev_id:
	if prev_id is not None:
	try:
	read_stdin()
	except:
	break
	prev_id = node.id
	print(u'\n({1}) {0:d}:'.format(node.id, u', '.join(self.inspector.sort_labels(node.labels))))
	for key, value in self.inspector.sort_properties(node.properties).items():
	print(u'.{0:<16s}\t{1}'.format(key, repr(value)))
	if r[u'type'] is not None:
	print(u'> {0} ({1})*{2}'.format(r[u'type'], u', '.join(self.inspector.sort_labels(r[u'labels'])), r[u'cnt_to']))
	print(u'\n{0:=<80}'.format(u''))


	class Neo4jInspector():
	def __init__(self, driver):
	self.driver = driver # Neo4j GraphDatabase driver
	self.reset()

	def reset(self):
	self.queriesByIndex = []
	self.queriesByHash = {}
	self.label_uses = {}

	print(u'Scanning labels')
	labelsets = [[r[u'labels'], r[u'cnt']] for r in self.run_query(u'MATCH (n) RETURN DISTINCT labels(n) AS labels, count(*) AS cnt')]
	for labels, cnt in labelsets:
	for label in labels:
	self.label_uses[label] = self.label_uses[label] + 1 if label in self.label_uses else 1
	for labels, cnt in sorted((self.sort_labels(labels), cnt) for labels, cnt in labelsets):
	self.set(
	description=u'( {0} )*{{cnt_to:d}}'.format(', '.join(labels)),
	cypher=u'MATCH (n) WHERE n:`{0}` AND size(labels(n)) = {1:d}'.format(u'` AND n:`'.join(labels), len(labels)),
	var_to=u'n',
	cnt_to=cnt
	)

	print(u'Scanning relation types')
	self.relationtypes = OrderedDict(
	(r[u'type'], index)
	for index, r in enumerate(self.run_query(u'MATCH ()-[r]->(n) RETURN DISTINCT type(r) as type, count(DISTINCT labels(n)) AS cnt ORDER BY cnt DESC, type ASC'))
	)

	print(u'Scanning property keys')
	self.propertykeys = OrderedDict(
	(key, index)
	for index, (cnt, key) in enumerate(sorted(
	(-next(iter(self.run_query(u'MATCH (n) WHERE exists(n.`{0}`) RETURN count(DISTINCT labels(n)) AS cnt'.format(r[u'propertyKey']))))[u'cnt'], r[u'propertyKey'])
	for r in self.run_query(u'CALL db.propertyKeys()')
	))
	)

	def sort_labels(self, labels):
	return [label for uses, label in sorted(zip([self.label_uses[label] for label in labels], labels))]

	def sort_properties(self, properties):
	return OrderedDict((key, value) for index, (key, value) in sorted(zip([self.propertykeys[key] for key in properties], properties.items())))

	def __len__(self):
	return len(self.queriesByIndex)

	def __iter__(self):
	return iter(self.queriesByIndex)

	def get(self, index):
	return self.queriesByIndex[index]

	def set(self, description, cypher, var_to, cnt_to=0, cnt_rel=0, var_from=None, cnt_from=0, parent=None):
	hash = hashfunc((cypher + (u'' if parent is None else parent.hash)).encode('UTF-8')).hexdigest()
	if hash not in self.queriesByHash:
	query = Query(self, len(self.queriesByIndex), hash, description, cypher, var_to, cnt_to, cnt_rel, var_from, cnt_from, parent)
	self.queriesByIndex.append(query)
	self.queriesByHash[hash] = query
	else:
	query = self.queriesByHash[hash]
	query.cnt_from, query.cnt_rel, query.cnt_to = (cnt_from, cnt_rel, cnt_to)
	return query

	def pop(self):
	self.queriesByHash.pop(self.queriesByIndex.pop().hash)

	def clear(self, till_index=0):
	for index in range(till_index, len(self.queriesByIndex)):
	self.pop()

	def run_query(self, arg, *kwarg):
	with self.driver.session() as session:
	return session.run(arg, *kwarg)


	# Main
	inspector = Neo4jInspector(DRIVER)
	nr_initial_queries = len(inspector)
	query_re = re.compile(u'^(?:(?P<var_to>[`a-z][^.!=])\|(?P<from_or_to>[<>])?(?P<nr>\d+))(?P<propsq>(?:[.!].?)*)?(?P<list>=)?$', re.UNICODE \| re.IGNORECASE)
	do_print_queries = True
	do_print_help = True
	while True:
	if do_print_queries:
	do_print_queries = False
	print(u'Queries (#):')
	for index, query in enumerate(inspector):
	if index == nr_initial_queries:
	print(u'-----')
	print(query.get_description())
	print(u'')

	if do_print_help:
	do_print_help = False
	print(u'Commands:')
	print(u'(<\|>)#[(.\|!)[prop]...][=]')
	print(u' Inspect or list (=) the source (<) or target (>) nodes of given query (#),')
	print(u' optionally filtered by having (.) or lacking (!) given properties')
	print(u' (all or none if prop omitted).\n')
	print(u'subject[(.\|!)[prop]...][=]\\ncypher\\n\\n')
	print(u' Inspect or list (=) the nodes of given subject from given Cypher query')
	print(u' (without RETURN and ORDER BY clauses). The query will also be added to the')
	print(u' list of available queries.\n')
	print(u':')
	print(u' List queries\n')
	print(u'~')
	print(u' Drop all but the initial queries for unique label combinations\n')
	print(u'?')
	print(u' Print this help information\n')

	try:
	line = read_stdin(u'>>> ').strip()
	except EOFError as e:
	break
	except:
	print(u'')
	continue

	if line == u'':
	continue
	elif line == u':':
	do_print_queries = True
	elif line == u'?':
	do_print_help = True
	elif line == u'~':
	inspector.clear(nr_initial_queries)
	print(u'Reset to initial queries\n')
	else:
	m = query_re.search(line)
	if not m:
	print(u'Invalid command (? = help)\n')
	else:
	try:
	# Fetch query
	if m.group(u'var_to') is None: # Defined query
	query = inspector.get(int(m.group(u'nr')))
	use_from = (m.group(u'from_or_to') == u'<')
	else: # Manual query
	var_to = m.group(u'var_to')
	cypher = u''
	try:
	while True:
	line = read_stdin(u'... ').strip()
	if line == u'':
	break
	cypher += u'\n' + line
	except EOFError as e:
	continue
	query = inspector.set(
	description=u'\| {0} \|*{{cnt_to}})'.format(cypher[1:].replace(u'\n', u'').replace(u'{', u'{{').replace(u'}', u'}}')),
	cypher=cypher[1:],
	var_to=var_to
	)
	use_from = False

	# Inspect or list
	if m.group(u'list') is None:
	query.inspect(use_from, m.group(u'propsq'))
	else:
	query.list(use_from, m.group(u'propsq'))
	print(u'')

	except BaseException as e:
	print(u'ERROR: {0}\n'.format(e))
	if (m.group(u'var_to') is not None) and (query.index == (len(inspector)-1)):
	inspector.pop() # Drop newly added manual query

	print(u'')
	sys.exit(0)