Skip to content

Instantly share code, notes, and snippets.

@reedstrm
Last active August 29, 2015 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save reedstrm/b3faff9b0d9cb9af9fd5 to your computer and use it in GitHub Desktop.
Save reedstrm/b3faff9b0d9cb9af9fd5 to your computer and use it in GitHub Desktop.
Cleaning up our namespace mess
mjs="""{"publishers": [{"website": null, "hasAccepted": true, "surname": "Reedstrom", "firstname": "Ross", "title": "Dr", "emails": ["ross.reedstrom@rice.edu", "ross@cnx.org"], "id": "reedstrm", "requester": "reedstrm", "assignmentDate": "2015-02-10T14:57:51.543583-06:00", "fullname": "Ross Reedstrom", "type": "cnx-id", "email": ""}], "version": "draft", "derivedFrom": null, "abstract": "<div xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:bib=\"http://bibtexml.sf.net/\" xmlns:data=\"http://dev.w3.org/html5/spec/#custom\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:lrmi=\"http://lrmi.net/the-specification\" class=\"description\" itemprop=\"description\" data-type=\"description\">\n <div xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:bib=\"http://bibtexml.sf.net/\" xmlns:data=\"http://dev.w3.org/html5/spec/#custom\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:lrmi=\"http://lrmi.net/the-specification\" class=\"description\" itemprop=\"description\" data-type=\"description\">\n <p> </p>\n </div>\n </div>", "revised": "2015-03-25T09:08:19.298644-05:00", "cnx-archive-uri": "78bc0570-40d5-4ef0-a805-0c480a0e85d7", "derivedFromUri": null, "containedIn": [], "isPublishable": true, "printStyle": null, "derivedFromTitle": null, "authors": [{"website": null, "hasAccepted": true, "surname": "Reedstrom", "firstname": "Ross", "title": "Dr", "emails": ["ross.reedstrom@rice.edu", "ross@cnx.org"], "id": "reedstrm", "requester": "reedstrm", "assignmentDate": "2015-02-10T14:57:51.543697-06:00", "fullname": "Ross Reedstrom", "type": "cnx-id", "email": ""}], "keywords": [], "mediaType": "application/vnd.org.cnx.module", "id": "78bc0570-40d5-4ef0-a805-0c480a0e85d7", "permissions": ["edit", "publish", "view"], "license": {"url": "http://creativecommons.org/licenses/by/4.0/", "version": "4.0", "name": "Attribution", "abbr": "by"}, "licensors": [{"website": null, "hasAccepted": true, "surname": "Reedstrom", "firstname": "Ross", "title": "Dr", "emails": ["ross.reedstrom@rice.edu", "ross@cnx.org"], "id": "reedstrm", "requester": "reedstrm", "assignmentDate": "2015-02-10T14:57:51.543655-06:00", "fullname": "Ross Reedstrom", "type": "cnx-id", "email": ""}], "publication": null, "language": "en", "title": "Test Page Alpha", "created": "2015-02-05T12:14:56-06:00", "editors": [], "publishBlockers": null, "content": "<p xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:bib=\"http://bibtexml.sf.net/\" xmlns:data=\"http://dev.w3.org/html5/spec/#custom\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:lrmi=\"http://lrmi.net/the-specification\">Only users known to the legacy system on this one</p><div xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:bib=\"http://bibtexml.sf.net/\" xmlns:data=\"http://dev.w3.org/html5/spec/#custom\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:lrmi=\"http://lrmi.net/the-specification\" data-label=\"tip\" class=\"note\"><span class=\"title\">Dealing with tracebacks<br/></span><p>Runtime errors in most languages will generate a traceback, either directly, or via a core dump file which can then be interpreted by a debugger</p><br/></div><p><br/></p><p><math xmlns=\"http://www.w3.org/1998/Math/MathML\" xmlns=\"http://www.w3.org/1998/Math/MathML\"><semantics xmlns=\"http://www.w3.org/1998/Math/MathML\"><mrow xmlns=\"http://www.w3.org/1998/Math/MathML\">\n <msqrt xmlns=\"http://www.w3.org/1998/Math/MathML\">\n <mn xmlns=\"http://www.w3.org/1998/Math/MathML\">36</mn>\n </msqrt>\n <mo xmlns=\"http://www.w3.org/1998/Math/MathML\">+</mo>\n <msup xmlns=\"http://www.w3.org/1998/Math/MathML\">\n <mi xmlns=\"http://www.w3.org/1998/Math/MathML\">x</mi>\n <mn xmlns=\"http://www.w3.org/1998/Math/MathML\">3</mn>\n </msup>\n <mo xmlns=\"http://www.w3.org/1998/Math/MathML\">=</mo>\n <msup xmlns=\"http://www.w3.org/1998/Math/MathML\">\n <mi xmlns=\"http://www.w3.org/1998/Math/MathML\">y</mi>\n <mn xmlns=\"http://www.w3.org/1998/Math/MathML\">2</mn>\n </msup>\n</mrow><annotation xmlns=\"http://www.w3.org/1998/Math/MathML\" encoding=\"math/tex\">\\sqrt{36} + x^3 = y^2</annotation></semantics></math>\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n <br/></p>", "state": "Draft", "subjects": [], "copyrightHolders": [{"website": null, "hasAccepted": true, "surname": "Reedstrom", "firstname": "Ross", "title": "Dr", "emails": ["ross.reedstrom@rice.edu", "ross@cnx.org"], "id": "reedstrm", "requester": "reedstrm", "assignmentDate": "2015-02-10T14:57:51.543655-06:00", "fullname": "Ross Reedstrom", "type": "cnx-id", "email": ""}], "submitter": {"surname": "Reedstrom", "fullname": "Ross J. Reedstrom", "type": "cnx-id", "id": "reedstrm", "firstname": "Ross"}, "illustrators": [], "translators": []}
"""
from lxml import etree
import json
#with open('m.json') as f:
# jm=json.load(f)
jm=json.loads(mjs)
jm['content']
xp=etree.XMLParser(ns_cleanup=True)
xp=etree.XMLParser(ns_clean=True)
etree.parse('<body>{}</body'.format(jm['content']),xp)
e=etree.fromstring('<body>{}</body'.format(jm['content']),xp)
xp=etree.XMLParser(ns_clean=True,recover=True)
e=etree.fromstring('<body>{}</body'.format(jm['content']),xp)
e
etree.tostring(e)
e=etree.fromstring('<body xmlns="http://www.w3.org/1999/xhtml" xmlns:bib="http://bibtexml.sf.net/" xmlns:data="http://dev.w3.org/html5/spec/#custom" xmlns:epub="http://www.idpf.org/2007/ops" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:lrmi="http://lrmi.net/the-specification">{}</body'.format(jm['content']),xp)
etree.tostring(e)
print etree.tostring(e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment