Last active
December 18, 2018 19:45
-
-
Save jmakeig/b9f2ed6c94201dae24116d414a9914b0 to your computer and use it in GitHub Desktop.
Get lexicon values from a random sample of documents based on a query in MarkLogic
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
const op = require('/MarkLogic/optic'); | |
function* randomURIs(count = 1) { | |
yield* op | |
.fromLexicons({ | |
uri: cts.uriReference() | |
}) | |
.select(['uri', op.as('rand', op.xdmp.random())]) | |
.orderBy('rand') | |
.limit(count) | |
.map(row => row.uri) | |
.result(); | |
} | |
// Usage | |
Array.from(randomURIs(10)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
// declareUpdate(); | |
// for(let i = 0; i < 1000000; i++) { | |
// xdmp.documentInsert(`/${i}.json`, { i }); | |
// } | |
/** | |
* Randomly order array values using Fisher-Yates | |
* | |
* @see http://stackoverflow.com/a/2450976 | |
* | |
* @param {Array} array an `Array` or `Array`-like | |
* @return {Array} same `array` updated in place | |
* @throws {TypeError} when passed something that isn’t `Array`-like | |
*/ | |
/** */ | |
function shuffle(array) { | |
let currentIndex = array.length, | |
temporaryValue, | |
randomIndex; | |
while (0 !== currentIndex) { | |
randomIndex = Math.floor(Math.random() * currentIndex); | |
currentIndex -= 1; | |
temporaryValue = array[currentIndex]; | |
array[currentIndex] = array[randomIndex]; | |
array[randomIndex] = temporaryValue; | |
} | |
return array; | |
} | |
/** | |
* Project lexicon values out of a random set of documents, optionally shuffling the output order | |
* | |
* @param {cts.reference} lexicon - the lexicon from which to project values | |
* @param {number} [count = 1 ] - the number of documents to sample | |
* @param {cts.query | String} [query = cts.trueQuery()] - scope the documents over which to project lexicon values | |
* @param {String[]} [options = ['order-random']] - additional options to apply to the underlying `cts.values` call | |
Polyfills an `order-random` option that allows you to order the | |
projected values randomly. Caution: This could be expensive for | |
large lists. | |
* @return {Iterable} | |
*/ | |
function* randomLexiconValues( | |
lexicon, | |
count = 1, | |
query = cts.trueQuery(), | |
options = ['order-random'] | |
) { | |
const values = cts.values( | |
lexicon, | |
null, | |
['score-random', `truncate=${count}`].concat(options.filter(item => 'order-random' !== item)), | |
query | |
); | |
if (options.indexOf('order-random') > -1) { | |
yield* shuffle(Array.from(values)); // This is potentially expensive for long lists of values | |
} else { | |
yield* values; | |
} | |
} | |
/** Specialization of randomLexiconValues */ | |
function* randomURIs(count = 1) { | |
yield* randomLexiconValues(cts.uriReference(), count); | |
} | |
// Usage: | |
Array.from( | |
randomURIs(1000) | |
//randomLexiconValues(cts.elementReference(xs.QName('i')), 1000) | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
random-order
option to sort the projected values