Skip to content

Instantly share code, notes, and snippets.

@karanlyons
Last active September 6, 2018 08:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save karanlyons/01aaca21f213f8183c9c to your computer and use it in GitHub Desktop.
Save karanlyons/01aaca21f213f8183c9c to your computer and use it in GitHub Desktop.
Poor Man’s JS XPath (With support for wildcard globbing, regex matches, and array slice notation.)
var slice_re = new RegExp(/^(.*?)\[(-?\d*?)(:?)(-?\d*?)(:?)(-?\d*?)\]$/);
function xpath(path, objects) {
var selectors, selector, is_array_selector, array_components, array_components_length, array_rules, j, i, is_regex_selector, tail_path, objects_length, heap, object, matches, key, matches_length, match, array_start, array_end, array_interval, _, k;
if (!Array.isArray(objects)) {
objects = [objects];
}
selectors = path.split('.');
selector = selectors[0];
is_array_selector = slice_re.test(selector);
if (is_array_selector) {
array_components = selector.match(slice_re).filter(function (element) { return element !== '' }).slice(1);
selector = array_components.shift();
array_components_length = array_components.length;
array_rules = [0, undefined, 1];
j = 0;
for (i=0; i < array_components_length; i++) {
if (array_components[i] == ':') {
j += 1;
}
else {
array_rules[j] = parseInt(array_components[i]);
}
}
}
is_regex_selector = selector.slice(0, 1) === '/' && selector.slice(-1) === '/';
if (is_regex_selector) {
selector = selector.slice(1, -1);
}
else if (selector.indexOf('*') !== -1) {
selector = '^' + selector.replace('*', '.*?') + '$';
is_regex_selector = true;
}
if (is_regex_selector) {
selector = new RegExp(selector);
}
tail_path = selectors.length > 1? selectors.slice(1).join('.') : null;
objects_length = objects.length;
heap = [];
for (i=0; i < objects_length; i++) {
object = objects[i];
matches = [];
if (is_regex_selector) {
for (key in object) {
if (key.match(selector) !== null && object.hasOwnProperty(key)) {
matches.push(key);
}
}
}
else {
matches = [selector];
}
matches_length = matches.length;
for (j=0; j < matches_length; j++) {
match = matches[j];
if (object.hasOwnProperty(match)) {
if (is_array_selector && Array.isArray(object[match])) {
if (array_rules[0] < 0) {
array_start = Math.max(0, object[match].length + array_rules[0]);
}
else {
array_start = array_rules[0];
}
array_start = array_rules[0];
if (array_rules[1] === undefined) {
array_end = object[match].length;
}
else if (array_rules[1] < 0) {
array_end = Math.max(0, object[match].length + array_rules[1]);
}
else {
array_end = Math.min(object[match].length, array_rules[1]);
}
array_interval = array_rules[2];
if (array_start > array_end && array_interval > 0) {
array_interval *= -1;
}
else if (array_interval < 0 && array_start < array_end) {
_ = array_start;
array_start = array_end;
array_end = _;
}
for (k=array_start; (array_start <= array_end && k < array_end) || (array_start >= array_end && k >= array_end); k += array_interval) {
if (object[match][k] !== undefined) {
heap.push(object[match][k]);
}
}
}
else {
heap.push(object[match]);
}
}
}
}
if (tail_path !== null) {
return xpath(tail_path, heap);
}
else {
return heap;
}
}
var test_data = [
{
"author": "J. K. Rowling",
"books": [
{
"title": "Harry Potter and the Philosopher's Stone",
"stats": {
"pages": 500,
"sales": 25000000
}
},
{
"title": "Harry Potter and the Deathly Hallows",
"stats": {
"pages": 800,
"sales": 30000000
}
},
{
"title": "The Silkworm",
"stats": {
"pages": 400,
"sales": 1200000
}
}
],
"country": "England"
},
{
"author": "John Locke",
"books": [
{
"title": "Two Treatises of Government",
"stats": {
"pages": 200,
"sales": 60000000
}
},
{
"title": "Thoughts Concerning Education",
"stats": {
"pages": 150,
"sales": 80000000
}
}
],
"country": "England"
},
{
"author": "Sun Tzu",
"books": [
{
"title": "The Art of War",
"stats": {
"pages": 150,
"sales": 250000000
}
}
],
"country": "China"
}
];
function test(paths) {
var paths, output, paths_length, i;
output = '';
paths_length = paths.length;
for (i=0; i < paths_length; i++) {
output += "'" + paths[i] + "'" + ':\n\t' + JSON.stringify(xpath(paths[i], test_data), null, '\t').split('\n').join('\n\t') + '\n\n';
}
return output
}
console.log(test([
'books[].title',
'books[].stats.pages',
'books[].stats.*',
'author',
'books[].stats.*es',
'books[].stats./^pages|sales$/',
'books[0:2].title',
'books[2:0].title',
'books[::-1].title',
]));
@karanlyons
Copy link
Author

Returns:

'books[].title':
    [
        "Harry Potter and the Philosopher's Stone",
        "Harry Potter and the Deathly Hallows",
        "The Silkworm",
        "Two Treatises of Government",
        "Thoughts Concerning Education",
        "The Art of War"
    ]

'books[].stats.pages':
    [
        500,
        800,
        400,
        200,
        150,
        150
    ]

'books[].stats.*':
    [
        500,
        25000000,
        800,
        30000000,
        400,
        1200000,
        200,
        60000000,
        150,
        80000000,
        150,
        250000000
    ]

'author':
    [
        "J. K. Rowling",
        "John Locke",
        "Sun Tzu"
    ]

'books[].stats.*es':
    [
        500,
        25000000,
        800,
        30000000,
        400,
        1200000,
        200,
        60000000,
        150,
        80000000,
        150,
        250000000
    ]

'books[].stats./^pages|sales$/':
    [
        500,
        25000000,
        800,
        30000000,
        400,
        1200000,
        200,
        60000000,
        150,
        80000000,
        150,
        250000000
    ]

'books[0:2].title':
    [
        "Harry Potter and the Philosopher's Stone",
        "Harry Potter and the Deathly Hallows",
        "Two Treatises of Government",
        "Thoughts Concerning Education",
        "The Art of War"
    ]

'books[2:0].title':
    [
        "The Silkworm",
        "Harry Potter and the Deathly Hallows",
        "Harry Potter and the Philosopher's Stone",
        "Thoughts Concerning Education",
        "Two Treatises of Government",
        "The Art of War"
    ]

'books[::-1].title':
    [
        "The Silkworm",
        "Harry Potter and the Deathly Hallows",
        "Harry Potter and the Philosopher's Stone",
        "Thoughts Concerning Education",
        "Two Treatises of Government",
        "The Art of War"
    ]

@chrisjohnson
Copy link

Would you mind if I take this, maybe clean up a few things and add some extra honey/a wrapper and package it up nice?

@karanlyons
Copy link
Author

Sure, if you'd like. Just throw some credit my way. There's a good amount of brute force style conditionals for some of the regex/slice handling that could definitely use some refactoring to simplify the logic, but it should still be fairly performant. The regex stuff doesn't get triggered unless the user is using it explicitly or globbing; it might also be worth special casing [] to avoid all the regex and iterative work since in that case it can be replaced by a normal push. Simple slicing (without an interval) could also be made more performant by skipping the for loop and just using slice directly (which'd also save you from having to properly calculate negative indexes.).

Let me know if there's anything about the code that I could better explain.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment