Skip to content

Instantly share code, notes, and snippets.

@MikeyBurkman
Created April 30, 2020 16:37
Show Gist options
  • Save MikeyBurkman/76ab65246d6244ab123cf2d94d228fbd to your computer and use it in GitHub Desktop.
Save MikeyBurkman/76ab65246d6244ab123cf2d94d228fbd to your computer and use it in GitHub Desktop.
XPath for TypeScript
import { SelectedValue, useNamespaces } from 'xpath';
import { DOMParser } from 'xmldom';
import * as _ from 'lodash';
import { VError } from 'verror';
interface Parser {
parseXml: (xml: string) => Document;
/**
* Type guard that returns true iff the selected value is a Node.
*/
isNode(selectedValue: SelectedValue | undefined): selectedValue is Node;
isNodeMaybe(
selectedValue: SelectedValue | undefined
): selectedValue is Node | undefined;
/**
* Type guard that returns true iff the selected value is a n Attr.
*/
isAttrNode(selectedValue: SelectedValue | undefined): selectedValue is Attr;
isAttrNodeMaybe(
selectedValue: SelectedValue | undefined
): selectedValue is Attr | undefined;
/**
* Type guard that returns true iff the selected value is a Text.
*/
isTextNode(selectedValue: SelectedValue | undefined): selectedValue is Text;
isTextNodeMaybe(
selectedValue: SelectedValue | undefined
): selectedValue is Text | undefined;
/**
* Selects the first Node to match the given selector. Will throw an error if it doesn't exist or
* the selector matches a non-Node
*/
selectFirstNode(selector: string, node: string | Node): Node;
/**
* Selects the first Node to match the given selector. Will throw an error if the selector matches
* a non-Node
*/
selectFirstNodeMaybe(selector: string, node: string | Node): Node | undefined;
/**
* Selects the first Text node to match the given selector. Will throw an error if it doesn't exist or
* the selector matches a non-Text node
*/
selectFirstTextNode(selector: string, node: string | Node): Text;
/**
* Selects the first Text node to match the given selector. Will throw an error if
* the selector matches a non-Text node
*/
selectFirstTextNodeMaybe(
selector: string,
node: string | Node
): Text | undefined;
/**
* Selects the first Attr node to match the given selector. Will throw an error if it doesn't exist or
* the selector matches a non-Attr node
*/
selectFirstAttrNode(selector: string, node: string | Node): Attr;
/**
* Selects the first Text node to match the given selector. Will throw an error if
* the selector matches a non-Attr node
*/
selectFirstAttrNodeMaybe(
selector: string,
node: string | Node
): Attr | undefined;
/**
* Selects all Nodes that match the given selector. Will throw an error if the selector matches any non-Nodes.
*/
selectNodes(selector: string, node: string | Node): Node[];
/**
* Selects all Text nodes that match the given selector. Will throw an error if the selector matches any non-Text nodes.
*/
selectTextNodes(selector: string, node: string | Node): Text[];
/**
* Selects all Attr nodes that match the given selector. Will throw an error if the selector matches any non-Attr nodes.
*/
selectAttrNodes(selector: string, node: string | Node): Attr[];
}
/**
* Builds an Xpath parser using the provided namespaces.
* For instance, if you are parsing SOAP messages, you will need to provide at least
* `{'xmlns:soap': 'http://schemas.xmlsoap.org/soap/envelope/'}`
*/
export const buildParser = (
namespaces: Record<string, string> = {}
): Parser => {
const select = useNamespaces(namespaces);
const parseXml = (xml: string) => new DOMParser().parseFromString(xml);
const isNode = (
selectedValue: SelectedValue | undefined
): selectedValue is Node => _.hasIn(selectedValue, 'nodeType');
const isNodeMaybe = (
selectedValue: SelectedValue | undefined
): selectedValue is Node | undefined =>
_.isNil(selectedValue) || isNode(selectedValue);
/**
* HOF for verifying that a value is a Node with the correct nodeType
*/
function isNodeType<T extends Node>(nodeType: number) {
return function(
selectedValue: SelectedValue | undefined
): selectedValue is T {
return isNode(selectedValue) && selectedValue.nodeType === nodeType;
};
}
/**
* HOF for verifying that a value is either null/undefined, OR a node with
* the correct nodeType
*/
function isNodeTypeMaybe<T extends Node>(nodeType: number) {
return function(
selectedValue: SelectedValue | undefined
): selectedValue is T | undefined {
return (
_.isNil(selectedValue) ||
(isNode(selectedValue) && selectedValue.nodeType === nodeType)
);
};
}
/**
* HOF for doing a selectFirst and then asserting that the returned value is a specific type.
*/
function selectFirstX<T extends SelectedValue | undefined>(
isX: (sv: SelectedValue | undefined) => sv is T
) {
return (selector: string, node: string | Node): T => {
if (typeof node === 'string') {
node = parseXml(node);
}
const selected = select(selector, node, true);
if (isX(selected)) {
return selected;
}
throw new VError(
{
info: {
selector,
resultFound: !!selected,
isRightType: isX(selected)
}
},
'Unable to find first value for selectFirst; might be wrong type'
);
};
}
/**
* HOF for doing a select and then asserting that every returned value is a specific type.
*/
function selectX<T extends SelectedValue | undefined>(
isX: (sv: SelectedValue | undefined) => sv is T
) {
return (selector: string, node: string | Node): T[] => {
if (typeof node === 'string') {
node = parseXml(node);
}
return select(selector, node).map((selected, idx) => {
if (isX(selected)) {
return selected;
}
throw new VError(
{
info: {
selector,
idx,
resultFound: !!selected,
isRightType: isX(selected)
}
},
'Unable to find a value for select; might be wrong type'
);
});
};
}
const isAttrNode = isNodeType<Attr>(2); // Attr nodes have nodeType = 2
const isAttrNodeMaybe = isNodeTypeMaybe<Attr>(2);
const isTextNode = isNodeType<Text>(3); // Text nodes have nodeType = 3
const isTextNodeMaybe = isNodeTypeMaybe<Text>(3);
const parser: Parser = {
parseXml,
isNode,
isNodeMaybe,
isAttrNode,
isAttrNodeMaybe,
isTextNode,
isTextNodeMaybe,
selectFirstNode: selectFirstX(isNode),
selectFirstNodeMaybe: selectFirstX(isNodeMaybe),
selectFirstAttrNode: selectFirstX(isAttrNode),
selectFirstAttrNodeMaybe: selectFirstX(isAttrNodeMaybe),
selectFirstTextNode: selectFirstX(isTextNode),
selectFirstTextNodeMaybe: selectFirstX(isTextNodeMaybe),
selectNodes: selectX(isNode),
selectAttrNodes: selectX(isAttrNode),
selectTextNodes: selectX(isTextNode)
};
return parser;
};
import { assert } from 'chai';
import { buildParser } from './index';
describe(__filename, () => {
it('Should do basic parsing with namespaces', () => {
const xml = `
<book xmlns:bookml="http://example.com/book">
<title id="55">Harry Potter</title>
<characters>
<character>Harry</character>
<character>Ron</character>
</characters>
</book>`;
const parser = buildParser({ bookml: 'http://example.com/book' });
// The "selectFirst" functions that don't end in "maybe" will always match something.
// If they do not match, they will throw a VError with some extra details.
assert.equal(
parser.selectFirstTextNode('//book/title/text()', xml).data,
'Harry Potter'
);
// You can pre-parse the xml string first, and pass that document object around.
// This is recommended if you are going to be selecting multiple things from the xml.
const doc = parser.parseXml(xml);
assert.equal(
parser.selectFirstTextNode('//book/title/text()', doc).data,
'Harry Potter'
);
assert.equal(
parser.selectFirstAttrNode('//book/title/@id', doc).value,
'55'
);
// There are also functions to return an array of nodes.
// All nodes in the array must match the expected type, or a VError is thrown.
assert.deepEqual(
parser
.selectTextNodes('//book/characters/character/text()', doc)
.map((node) => node.data),
['Harry', 'Ron']
);
// Can also pull out inner elements and then use them, instead of making very long selector strings.
const characters = parser.selectFirstNode('//book/characters', doc);
assert.deepEqual(
parser
.selectTextNodes('character/text()', characters)
.map((node) => node.data),
['Harry', 'Ron']
);
// The "maybe" functions are allowed to return undefined if the selector doesn't match anything.
assert.isUndefined(
parser.selectFirstTextNodeMaybe('//book/idontexist', doc)
);
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment