SKalt/_0_xml_to_json.md

## _0_xml_to_json.md

      
    Raw
  

              _0_xml_to_json.md
            
          
    Lxml's current FAQ includes a method of transforming xml to a dict of dicts, but not JSON.  The mismatch between a dict of dicts and JSON occurs when an element has mulitple children with the same tag name.  Under JSON conventions, multiple children of the same name are equivalent to an array or tuple. The below python functions attempt to add the repeated tags to list functionality.  I'd appreciate suggestions for improvements.

  
## _1_xml_to_json.py
def recursive_dict(element):
    "Given an lxml.etree._Element, recursively transform its children to dicts structured as JSON"
    if not len(element):
        return element.text
    else:
        results = {}
        for child in element:
            if results.get(child.tag, False):
                if type(results[child.tag]) != list:
                    results[child.tag] = [results[child.tag]]
                results[child.tag].append(recursive_dict(child))
            else:
                results[child.tag] = recursive_dict(child)
        return results

## _2_xml_to_json.py
def tojson(element):
    unique_child_tags = set([child.tag for child in element])
    results = {}
    if not unique_child_tags:
        return element.text
    for tag in unique_child_tags:
        children_with_tag = element.xpath(tag)
        if len(children_with_tag) == 1:
            results[tag] = tojson_2(children_with_tag[0])
        else:
            results[tag] = [tojson_2(child) for child in children_with_tag]
    return results
	def recursive_dict(element):
	"Given an lxml.etree._Element, recursively transform its children to dicts structured as JSON"
	if not len(element):
	return element.text
	else:
	results = {}
	for child in element:
	if results.get(child.tag, False):
	if type(results[child.tag]) != list:
	results[child.tag] = [results[child.tag]]
	results[child.tag].append(recursive_dict(child))
	else:
	results[child.tag] = recursive_dict(child)
	return results
	def tojson(element):
	unique_child_tags = set([child.tag for child in element])
	results = {}
	if not unique_child_tags:
	return element.text
	for tag in unique_child_tags:
	children_with_tag = element.xpath(tag)
	if len(children_with_tag) == 1:
	results[tag] = tojson_2(children_with_tag[0])
	else:
	results[tag] = [tojson_2(child) for child in children_with_tag]
	return results