Skip to content

Instantly share code, notes, and snippets.

@ornicar
Created April 23, 2021 06:13
Show Gist options
  • Save ornicar/cda04cf6e7144e1102e476b2104f952c to your computer and use it in GitHub Desktop.
Save ornicar/cda04cf6e7144e1102e476b2104f952c to your computer and use it in GitHub Desktop.
/* Generates and saves a new generation of puzzle paths.
* Drops the previous generation.
*
* mongo <IP>:<PORT>/<DB> mongodb-puzzle-regen-paths.js
*
* Must run on the puzzle database.
* Should run every 60 minutes.
* Should complete within 3 minutes.
* OK to run many times in a row.
* OK to skip runs.
* NOT OK to run concurrently.
*
* might require this mongodb config: (https://jira.mongodb.org/browse/SERVER-44174)
* setParameter:
* internalQueryMaxPushBytes: 314572800
*/
const puzzleColl = db.puzzle2_puzzle;
const pathColl = db.puzzle2_path;
const verbose = false;
const maxRatingBuckets = 12;
const maxPathLength = 500;
const maxPuzzlesPerTheme = 2 * 1000 * 1000; // reduce to 500000 to avoid memory restrictions in some envs (!?)
const generation = Date.now();
const tiers = [
['top', 20 / 100],
['good', 50 / 100],
['all', 95 / 100],
];
const mixBoundaries = [
100,
800,
900,
1000,
1100,
1200,
1270,
1340,
1410,
1480,
1550,
1620,
1690,
1760,
1830,
1900,
2000,
2100,
2200,
2350,
2500,
2650,
2800,
9999,
];
const themes = db.puzzle2_puzzle.distinct('themes', {});
// const themes = ['overloading'];
function chunkify(a, n) {
let len = a.length,
out = [],
i = 0,
size;
if (len % n === 0) {
size = Math.floor(len / n);
while (i < len) {
out.push(a.slice(i, (i += size)));
}
} else
while (i < len) {
size = Math.ceil((len - i) / n--);
out.push(a.slice(i, (i += size)));
}
return out;
}
const padRating = r => (r < 1000 ? '0' : '') + r;
themes.concat(['mix']).forEach(theme => {
// ['mix'].forEach(theme => {
const selector = {
themes:
theme == 'mix'
? {
$ne: 'equality',
}
: theme == 'equality'
? 'equality'
: {
$eq: theme,
$ne: 'equality',
},
};
const bucketBase = {
groupBy: '$glicko.r',
output: { puzzle: { $push: { id: '$_id', vote: '$vote' } } },
};
const nbPuzzles = puzzleColl.count(selector);
if (!nbPuzzles) return [];
const themeMaxPathLength = Math.max(10, Math.min(maxPathLength, Math.round(nbPuzzles / 200)));
const nbRatingBuckets =
theme == 'mix'
? mixBoundaries.length - 1
: Math.max(3, Math.min(maxRatingBuckets, Math.round(nbPuzzles / themeMaxPathLength / 20)));
const bucketStages =
theme == 'mix'
? [
{
$bucket: {
...bucketBase,
boundaries: mixBoundaries,
},
},
{ $addFields: { _id: { min: '$_id' } } },
]
: [
{
$bucketAuto: {
...bucketBase,
buckets: nbRatingBuckets,
},
},
];
if (verbose)
print(
`theme: ${theme}, puzzles: ${nbPuzzles}, path length: ${themeMaxPathLength}, rating buckets: ${nbRatingBuckets}`
);
let bucketIndex = 0;
db.puzzle2_puzzle
.aggregate(
[
{
$match: selector,
},
{
$limit: maxPuzzlesPerTheme,
},
...bucketStages,
{
$unwind: '$puzzle',
},
{
$sort: {
'puzzle.vote': -1,
},
},
{
$group: {
_id: '$_id',
total: {
$sum: 1,
},
puzzles: {
$push: '$puzzle.id',
},
},
},
{
$facet: tiers.reduce(
(facets, [name, ratio]) => ({
...facets,
...{
[name]: [
{
$project: {
total: 1,
puzzles: {
$slice: [
'$puzzles',
{
$round: {
$multiply: ['$total', ratio],
},
},
],
},
},
},
{
$unwind: '$puzzles',
},
{
$sample: {
// shuffle
size: 9999999,
},
},
{
$group: {
_id: '$_id',
puzzles: {
$addToSet: '$puzzles',
},
},
},
{
$sort: {
'_id.min': 1,
},
},
{
$addFields: {
tier: name,
},
},
],
},
}),
{}
),
},
{
$project: {
bucket: {
$concatArrays: tiers.map(t => '$' + t[0]),
},
},
},
{
$unwind: '$bucket',
},
{
$replaceRoot: {
newRoot: '$bucket',
},
},
],
{
allowDiskUse: true,
comment: 'regen-paths',
}
)
.forEach(bucket => {
const positionInTier = bucketIndex % nbRatingBuckets;
const isFirstOfTier = positionInTier == 0;
const isLastOfTier = positionInTier == nbRatingBuckets - 1;
const pathLength = Math.max(10, Math.min(maxPathLength, Math.round(bucket.puzzles.length / 30)));
const ratingMin = isFirstOfTier ? 100 : Math.ceil(bucket._id.min);
const ratingMax = isLastOfTier
? 9999
: theme == 'mix'
? mixBoundaries[positionInTier + 1]
: Math.floor(bucket._id.max);
const nbPaths = Math.max(1, Math.floor(bucket.puzzles.length / pathLength));
const paths = chunkify(bucket.puzzles, nbPaths);
if (verbose)
print(
` ${theme} ${positionInTier} ${bucket.tier} ${ratingMin}->${ratingMax} puzzles: ${bucket.puzzles.length} pathLength: ${pathLength} paths: ${paths.length}`
);
pathColl.insert(
paths.map((ids, j) => ({
_id: `${theme}_${bucket.tier}_${padRating(ratingMin)}-${padRating(ratingMax)}_${generation}_${j}`,
min: `${theme}_${bucket.tier}_${padRating(ratingMin)}`,
max: `${theme}_${bucket.tier}_${padRating(ratingMax)}`,
ids,
tier: bucket.tier,
theme: theme,
gen: generation,
})),
{
ordered: false,
}
);
bucketIndex++;
});
});
pathColl.remove({
gen: {
$ne: generation,
},
});
@v6ak
Copy link

v6ak commented May 3, 2023

Hello, could you add some license statement, please?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment