taikedz/README.md

## README.md

      
    Raw
  

              README.md
            
          
    This is a relatively simple JSON object traverser: feed it either a JSON string or a nested set of dicitonaries/lists and use a path notation to access individual items.

It supports paths separated with either / or any separator string you specify
It supports wildcards * to iterate over multiple values
Arrays/lists are accessed simply by number instaed of names e.g. name_with_array/0/property_of_zeroth_item
If a bad query token or data type is encountered, it will print the offending path

It is relatively naive, insomuch as it will create a new array in the output object for each wildcard used along the path. For example, here's an example of getting all subnet definitions on docker inspect <networks ...>:
docker_network_ids = ["789af6","ec87f65"]
stdout,stderr = subprocess.Popen("docker inspect".split(" ") + docker_network_ids, stdout=subprocess.PIPE ).communicate()

# jsonwalker at play:

result = jsonlib.JSONWalker(stdout).get('*/IPAM/Config/*/Subnet')

for ipam in result: # array of subnets, though the piece we next accessed is the 'IPAM' property
    for subnet in ipam: # again, array of 'Config', but it's the Subnet we accessed directly
        print(subnet)
You can use a flatten=True argument on the get() method to attempt to intelligently flatten the results. The above becomes thus:
result = jsonlib.JSONWalker(stdout).get('*/IPAM/Config/*/Subnet', flatten=True)

for subnet in result:
    print(subnet)
There are no unit tests, nor performance tests. Use at your own discretion.

  
## jsonwalker.py
# (C) Tai Kedzierski
# Provided under LGPLv3
# https://www.gnu.org/licenses/lgpl-3.0.txt

import json
import re

class JSONWalker:
    """
    A class to walk JSON data using a simplified path notation.

    A path is a series of names separated by a '/', or other separator. No token can have the separator in its name.

        my/json/path

    Arrays must be accessed via a number
    '*' iterates over all array entries.

        cars/0/make  # returns a value (assuming make is a single value)
        cars/*/make  # returns a list of values

    Dictionnaries are accessed by name. Accessing a dictrionary by name directly returns the dictionary itself.
    '*' iterates over all names. Using '*' strips names, returning a list instead.

        cars/*/passengers     # returns a list of passenger dicts
        cars/*/passengers/*  # returns a list of values from each passenger dict property
    """

    def __init__(self, jsondata):
        """
        Create a new JSONWalker from a JSON dict/list object, or from a raw string of JSON data
        """
        if type(jsondata) == str:
            self.jsondata = json.loads(jsondata)
        else:
            self.jsondata = jsondata

    def get(self, path, separator='/', flatten=False):
        """
        Return all items found along the path. Path tokens are split along the separator.

        Normally for each wildcard used, a nested array is created. Specify `flatten=True` to attempt to flatten into a flat array.
        """
        path = path.split(separator)
        self.separator = separator
        self.flatten = flatten

        return self.__extract(path, self.jsondata)

    def __extract(self, path, data, history=[]):
        if len(path) == 0:
            return data
        while path[0] == '':
            path = path[1:]

        if type(data) == dict:
            return self.__extractDict(path, data, history)
        elif type(data) == list:
            return self.__extractList(path, data, history)
        else:
            raise ValueError("Invalid type %s at %s"%(type(data), self.separator.join(history)))

    def __extractDict(self, path, data, history=[]):
        if len(path) == 0:
            return data
        while path[0] == '':
            path = path[1:]

        token = path[0]
        got = None

        if token == '*':
            forks = []
            for entry in data.keys():
                history_f = history.copy()
                history_f.append(entry)
                forks.append(self.__extract(path[1:], data[entry], history_f) )
            return forks
        else:
            history.append(token)
            return self.__extract(path[1:], data[path[0]], history)

    def __extractList(self, path, data, history=[]):
        if len(path) == 0:
            return data

        token = path[0]
        got = None

        if re.match("[0-9]$", token):
            got = data[int(token)]
        elif token == '*':
            forks = []
            for i in range(len(data)):
                history_f = history.copy()
                history_f.append(str(i))
                forks.append(self.__extract(path[1:], data[i], history_f) )

            forks = self.__attemptCollate(forks)
            return forks
        else:
            history.append(token)
            raise IndexError("Indax NaN: %s at %s"%(token, self.separator.join( history)) )

        return self.__extract(path[1:], got, history)

    def __attemptCollate(self, forks):
        if self.flatten:
            if len(forks) > 0: # We are checking to see if we specifically have an array of arrays
                entry = forks[0]
                if type(entry) == list and len(entry) > 0:
                    forks = [item for result in forks for item in result] # ... otherwise this might flatten incorrectly
        return forks
	# (C) Tai Kedzierski
	# Provided under LGPLv3
	# https://www.gnu.org/licenses/lgpl-3.0.txt

	import json
	import re

	class JSONWalker:
	"""
	A class to walk JSON data using a simplified path notation.

	A path is a series of names separated by a '/', or other separator. No token can have the separator in its name.

	my/json/path

	Arrays must be accessed via a number
	'*' iterates over all array entries.

	cars/0/make # returns a value (assuming make is a single value)
	cars/*/make # returns a list of values

	Dictionnaries are accessed by name. Accessing a dictrionary by name directly returns the dictionary itself.
	'' iterates over all names. Using '' strips names, returning a list instead.

	cars/*/passengers # returns a list of passenger dicts
	cars//passengers/ # returns a list of values from each passenger dict property
	"""

	def __init__(self, jsondata):
	"""
	Create a new JSONWalker from a JSON dict/list object, or from a raw string of JSON data
	"""
	if type(jsondata) == str:
	self.jsondata = json.loads(jsondata)
	else:
	self.jsondata = jsondata

	def get(self, path, separator='/', flatten=False):
	"""
	Return all items found along the path. Path tokens are split along the separator.

	Normally for each wildcard used, a nested array is created. Specify `flatten=True` to attempt to flatten into a flat array.
	"""
	path = path.split(separator)
	self.separator = separator
	self.flatten = flatten

	return self.__extract(path, self.jsondata)

	def __extract(self, path, data, history=[]):
	if len(path) == 0:
	return data
	while path[0] == '':
	path = path[1:]

	if type(data) == dict:
	return self.__extractDict(path, data, history)
	elif type(data) == list:
	return self.__extractList(path, data, history)
	else:
	raise ValueError("Invalid type %s at %s"%(type(data), self.separator.join(history)))

	def __extractDict(self, path, data, history=[]):
	if len(path) == 0:
	return data
	while path[0] == '':
	path = path[1:]

	token = path[0]
	got = None

	if token == '*':
	forks = []
	for entry in data.keys():
	history_f = history.copy()
	history_f.append(entry)
	forks.append(self.__extract(path[1:], data[entry], history_f) )
	return forks
	else:
	history.append(token)
	return self.__extract(path[1:], data[path[0]], history)

	def __extractList(self, path, data, history=[]):
	if len(path) == 0:
	return data

	token = path[0]
	got = None

	if re.match("[0-9]$", token):
	got = data[int(token)]
	elif token == '*':
	forks = []
	for i in range(len(data)):
	history_f = history.copy()
	history_f.append(str(i))
	forks.append(self.__extract(path[1:], data[i], history_f) )

	forks = self.__attemptCollate(forks)
	return forks
	else:
	history.append(token)
	raise IndexError("Indax NaN: %s at %s"%(token, self.separator.join( history)) )

	return self.__extract(path[1:], got, history)

	def __attemptCollate(self, forks):
	if self.flatten:
	if len(forks) > 0: # We are checking to see if we specifically have an array of arrays
	entry = forks[0]
	if type(entry) == list and len(entry) > 0:
	forks = [item for result in forks for item in result] # ... otherwise this might flatten incorrectly
	return forks