Skip to content

Instantly share code, notes, and snippets.

@erikvullings
Last active November 18, 2023 17:39
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erikvullings/5c5638842eaa4fa88c0f4a987ea45da2 to your computer and use it in GitHub Desktop.
Save erikvullings/5c5638842eaa4fa88c0f4a987ea45da2 to your computer and use it in GitHub Desktop.
Convert XML to JSON using the browser's DOMParser without external dependencies.
export interface IObject {
[key: string]: any;
}
/*
This work is licensed under Creative Commons GNU LGPL License.
License: http://creativecommons.org/licenses/LGPL/2.1/
Version: 0.9
Author: Stefan Goessner/2006,
Conversion: Erik Vullings/2021 converted to TypeScript
Web: https://goessner.net/download/prj/jsonxml/
*/
/**
* Convert an XML string to JSON
* @param xmlStr: the XML input as string
* @param excludeKeys: an optional set of keys to exclude when parsing string values to numbers.
* @param jsonifyKeys: If true, convert the keys to start with a lowercase letter
*/
export const xml2json = <T extends IObject>(
xmlStr: string,
excludeKeys: Set<string> = new Set(),
jsonifyKeys = false
): T => {
let xml = new DOMParser().parseFromString(xmlStr, 'text/xml') as Document | HTMLElement;
var X = {
toObj: function (xml: Document | HTMLElement | ChildNode): IObject | string | null {
var o = {} as IObject | string | null;
if (xml.nodeType == 1) {
// element node ..
if ((xml as HTMLElement).attributes.length)
// element with attributes ..
for (var i = 0; i < (xml as HTMLElement).attributes.length; i++)
(o as IObject)['@' + (xml as HTMLElement).attributes[i].nodeName] = (
(xml as HTMLElement).attributes[i].nodeValue || ''
).toString();
if (xml.firstChild) {
// element has child nodes ..
var textChild = 0,
cdataChild = 0,
hasElementChild = false;
for (let n = xml.firstChild as HTMLElement | ChildNode | null; n; n = n.nextSibling) {
if (n.nodeType == 1) hasElementChild = true;
else if (n.nodeType == 3 && n.nodeValue && n.nodeValue.match(/[^ \f\n\r\t\v]/))
textChild++;
// non-whitespace text
else if (n.nodeType == 4) cdataChild++; // cdata section node
}
if (hasElementChild) {
if (textChild < 2 && cdataChild < 2) {
// structured element with evtl. a single text or/and cdata node ..
X.removeWhite(xml as HTMLElement);
for (var n = xml.firstChild as HTMLElement | ChildNode | null; n; n = n.nextSibling) {
if (n.nodeType == 3 && n.nodeValue)
// text node
(o as IObject)['#text'] = X.escape(n.nodeValue);
else if (n.nodeType == 4 && n.nodeValue)
// cdata node
(o as IObject)['#cdata'] = X.escape(n.nodeValue);
else if ((o as IObject)[n.nodeName]) {
// multiple occurence of element ..
if ((o as IObject)[n.nodeName] instanceof Array)
(o as IObject)[n.nodeName][(o as IObject)[n.nodeName].length] = X.toObj(n);
else (o as IObject)[n.nodeName] = [(o as IObject)[n.nodeName], X.toObj(n)];
} // first occurence of element..
else (o as IObject)[n.nodeName] = X.toObj(n);
}
} else {
// mixed content
if (!(xml as HTMLElement).attributes.length)
o = X.escape(X.innerXml(xml as HTMLElement));
else (o as IObject)['#text'] = X.escape(X.innerXml(xml as HTMLElement));
}
} else if (textChild) {
// pure text
if (!(xml as HTMLElement).attributes.length)
o = X.escape(X.innerXml(xml as HTMLElement));
else (o as IObject)['#text'] = X.escape(X.innerXml(xml as HTMLElement));
} else if (cdataChild) {
// cdata
if (cdataChild > 1) o = X.escape(X.innerXml(xml as HTMLElement));
else
for (let n = xml.firstChild as ChildNode | null; n; n = n.nextSibling)
(o as IObject)['#cdata'] = n.nodeValue ? X.escape(n.nodeValue) : '';
}
}
if (!(xml as HTMLElement).attributes.length && !xml.firstChild) o = null;
} else if (xml.nodeType == 9) {
// document.node
o = X.toObj((xml as Document).documentElement);
} else alert('unhandled node type: ' + xml.nodeType);
return o;
},
toJson: function (
o: { [key: string]: any } | string | number | boolean | Array<any> | null,
name: string,
ind: string
) {
var json = name ? '"' + name + '"' : '';
if (o instanceof Array) {
for (var i = 0, n = o.length; i < n; i++) o[i] = X.toJson(o[i], '', ind + '\t');
json +=
(name ? ':[' : '[') +
(o.length > 1
? '\n' + ind + '\t' + o.join(',\n' + ind + '\t') + '\n' + ind
: o.join('')) +
']';
} else if (o == null) json += (name && ':') + 'null';
else if (typeof o == 'object') {
var arr = [];
for (var m in o) arr[arr.length] = X.toJson(o[m], m, ind + '\t');
json +=
(name ? ':{' : '{') +
(arr.length > 1
? '\n' + ind + '\t' + arr.join(',\n' + ind + '\t') + '\n' + ind
: arr.join('')) +
'}';
} else if (typeof o == 'string') json += (name && ':') + '"' + o.toString() + '"';
else json += (name && ':') + o.toString();
return json;
},
innerXml: function (node: Document | HTMLElement) {
var s = '';
if ('innerHTML' in node) s = node.innerHTML;
else {
var asXml = function (n: HTMLElement) {
var s = '';
if (n.nodeType == 1) {
s += '<' + n.nodeName;
for (var i = 0; i < n.attributes.length; i++)
s +=
' ' +
n.attributes[i].nodeName +
'="' +
(n.attributes[i].nodeValue || '').toString() +
'"';
if (n.firstChild) {
s += '>';
for (let c = n.firstChild as HTMLElement | ChildNode | null; c; c = c.nextSibling)
s += asXml(c as HTMLElement);
s += '</' + n.nodeName + '>';
} else s += '/>';
} else if (n.nodeType == 3) s += n.nodeValue;
else if (n.nodeType == 4) s += '<![CDATA[' + n.nodeValue + ']]>';
return s;
};
for (var c = node.firstChild; c; c = c.nextSibling) s += asXml(c as HTMLElement);
}
return s;
},
escape: function (txt: string) {
return txt
.replace(/[\\]/g, '\\\\')
.replace(/[\"]/g, '\\"')
.replace(/[\n]/g, '\\n')
.replace(/[\r]/g, '\\r');
},
removeWhite: function (e: ChildNode) {
e.normalize();
for (var n = e.firstChild; n; ) {
if (n.nodeType == 3) {
// text node
if (!n.nodeValue!.match(/[^ \f\n\r\t\v]/)) {
// pure whitespace text node
var nxt = n.nextSibling;
e.removeChild(n);
n = nxt;
} else n = n.nextSibling;
} else if (n.nodeType == 1) {
// element node
X.removeWhite(n);
n = n.nextSibling;
} // any other node
else n = n.nextSibling;
}
return e;
},
};
if (xml.nodeType == 9)
// document node
xml = (xml as Document).documentElement;
const jsonStr = `{${X.toJson(X.toObj(X.removeWhite(xml as HTMLElement)), xml.nodeName, '\t')}}`;
const toNumber = (key: string, value: any) =>
!excludeKeys.has(key) && typeof value === 'string' && !isNaN(+value) ? +value : value;
const multipleUpperCaseLettersRegex = /([A-Z]+)([A-Z])/;
const reviver = jsonifyKeys
? function (_key: string, value: any) {
if (value && typeof value === 'object' && !Array.isArray(value)) {
// It's a non-null, non-array object, create a replacement with the keys initially-capped
const newValue = {} as IObject;
for (const key in value) {
const newKey = multipleUpperCaseLettersRegex.test(key)
? key.replace(
multipleUpperCaseLettersRegex,
(_full, a, b) => `${a.toLowerCase()}${b}`
)
: key.charAt(0).toLowerCase() + key.slice(1);
newValue[newKey] = toNumber(key, value[key]);
}
return newValue;
}
return value;
}
: toNumber;
return JSON.parse(jsonStr, reviver);
};
@abrman
Copy link

abrman commented Jan 24, 2023

This is lovely and worked for most of my xml files, but then I hit case with a windows style path where I wasn't able to get it running using your code. I see you're trying to escape backslashes on line 153, but I wasn't able to pinpoint to why it's happening.

Here's a snippet for testing if you'd like to have a look at it.

<?xml version="1.0"?>
<Project name="C:\Path\To\File.xml"></Project>

I can't guarantee it's valid xml, however it's a snippet of exported file from Autocad Civil 3D, so I'd assume it's valid.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment