Created
April 23, 2021 06:13
-
-
Save ornicar/cda04cf6e7144e1102e476b2104f952c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Generates and saves a new generation of puzzle paths. | |
* Drops the previous generation. | |
* | |
* mongo <IP>:<PORT>/<DB> mongodb-puzzle-regen-paths.js | |
* | |
* Must run on the puzzle database. | |
* Should run every 60 minutes. | |
* Should complete within 3 minutes. | |
* OK to run many times in a row. | |
* OK to skip runs. | |
* NOT OK to run concurrently. | |
* | |
* might require this mongodb config: (https://jira.mongodb.org/browse/SERVER-44174) | |
* setParameter: | |
* internalQueryMaxPushBytes: 314572800 | |
*/ | |
const puzzleColl = db.puzzle2_puzzle; | |
const pathColl = db.puzzle2_path; | |
const verbose = false; | |
const maxRatingBuckets = 12; | |
const maxPathLength = 500; | |
const maxPuzzlesPerTheme = 2 * 1000 * 1000; // reduce to 500000 to avoid memory restrictions in some envs (!?) | |
const generation = Date.now(); | |
const tiers = [ | |
['top', 20 / 100], | |
['good', 50 / 100], | |
['all', 95 / 100], | |
]; | |
const mixBoundaries = [ | |
100, | |
800, | |
900, | |
1000, | |
1100, | |
1200, | |
1270, | |
1340, | |
1410, | |
1480, | |
1550, | |
1620, | |
1690, | |
1760, | |
1830, | |
1900, | |
2000, | |
2100, | |
2200, | |
2350, | |
2500, | |
2650, | |
2800, | |
9999, | |
]; | |
const themes = db.puzzle2_puzzle.distinct('themes', {}); | |
// const themes = ['overloading']; | |
function chunkify(a, n) { | |
let len = a.length, | |
out = [], | |
i = 0, | |
size; | |
if (len % n === 0) { | |
size = Math.floor(len / n); | |
while (i < len) { | |
out.push(a.slice(i, (i += size))); | |
} | |
} else | |
while (i < len) { | |
size = Math.ceil((len - i) / n--); | |
out.push(a.slice(i, (i += size))); | |
} | |
return out; | |
} | |
const padRating = r => (r < 1000 ? '0' : '') + r; | |
themes.concat(['mix']).forEach(theme => { | |
// ['mix'].forEach(theme => { | |
const selector = { | |
themes: | |
theme == 'mix' | |
? { | |
$ne: 'equality', | |
} | |
: theme == 'equality' | |
? 'equality' | |
: { | |
$eq: theme, | |
$ne: 'equality', | |
}, | |
}; | |
const bucketBase = { | |
groupBy: '$glicko.r', | |
output: { puzzle: { $push: { id: '$_id', vote: '$vote' } } }, | |
}; | |
const nbPuzzles = puzzleColl.count(selector); | |
if (!nbPuzzles) return []; | |
const themeMaxPathLength = Math.max(10, Math.min(maxPathLength, Math.round(nbPuzzles / 200))); | |
const nbRatingBuckets = | |
theme == 'mix' | |
? mixBoundaries.length - 1 | |
: Math.max(3, Math.min(maxRatingBuckets, Math.round(nbPuzzles / themeMaxPathLength / 20))); | |
const bucketStages = | |
theme == 'mix' | |
? [ | |
{ | |
$bucket: { | |
...bucketBase, | |
boundaries: mixBoundaries, | |
}, | |
}, | |
{ $addFields: { _id: { min: '$_id' } } }, | |
] | |
: [ | |
{ | |
$bucketAuto: { | |
...bucketBase, | |
buckets: nbRatingBuckets, | |
}, | |
}, | |
]; | |
if (verbose) | |
print( | |
`theme: ${theme}, puzzles: ${nbPuzzles}, path length: ${themeMaxPathLength}, rating buckets: ${nbRatingBuckets}` | |
); | |
let bucketIndex = 0; | |
db.puzzle2_puzzle | |
.aggregate( | |
[ | |
{ | |
$match: selector, | |
}, | |
{ | |
$limit: maxPuzzlesPerTheme, | |
}, | |
...bucketStages, | |
{ | |
$unwind: '$puzzle', | |
}, | |
{ | |
$sort: { | |
'puzzle.vote': -1, | |
}, | |
}, | |
{ | |
$group: { | |
_id: '$_id', | |
total: { | |
$sum: 1, | |
}, | |
puzzles: { | |
$push: '$puzzle.id', | |
}, | |
}, | |
}, | |
{ | |
$facet: tiers.reduce( | |
(facets, [name, ratio]) => ({ | |
...facets, | |
...{ | |
[name]: [ | |
{ | |
$project: { | |
total: 1, | |
puzzles: { | |
$slice: [ | |
'$puzzles', | |
{ | |
$round: { | |
$multiply: ['$total', ratio], | |
}, | |
}, | |
], | |
}, | |
}, | |
}, | |
{ | |
$unwind: '$puzzles', | |
}, | |
{ | |
$sample: { | |
// shuffle | |
size: 9999999, | |
}, | |
}, | |
{ | |
$group: { | |
_id: '$_id', | |
puzzles: { | |
$addToSet: '$puzzles', | |
}, | |
}, | |
}, | |
{ | |
$sort: { | |
'_id.min': 1, | |
}, | |
}, | |
{ | |
$addFields: { | |
tier: name, | |
}, | |
}, | |
], | |
}, | |
}), | |
{} | |
), | |
}, | |
{ | |
$project: { | |
bucket: { | |
$concatArrays: tiers.map(t => '$' + t[0]), | |
}, | |
}, | |
}, | |
{ | |
$unwind: '$bucket', | |
}, | |
{ | |
$replaceRoot: { | |
newRoot: '$bucket', | |
}, | |
}, | |
], | |
{ | |
allowDiskUse: true, | |
comment: 'regen-paths', | |
} | |
) | |
.forEach(bucket => { | |
const positionInTier = bucketIndex % nbRatingBuckets; | |
const isFirstOfTier = positionInTier == 0; | |
const isLastOfTier = positionInTier == nbRatingBuckets - 1; | |
const pathLength = Math.max(10, Math.min(maxPathLength, Math.round(bucket.puzzles.length / 30))); | |
const ratingMin = isFirstOfTier ? 100 : Math.ceil(bucket._id.min); | |
const ratingMax = isLastOfTier | |
? 9999 | |
: theme == 'mix' | |
? mixBoundaries[positionInTier + 1] | |
: Math.floor(bucket._id.max); | |
const nbPaths = Math.max(1, Math.floor(bucket.puzzles.length / pathLength)); | |
const paths = chunkify(bucket.puzzles, nbPaths); | |
if (verbose) | |
print( | |
` ${theme} ${positionInTier} ${bucket.tier} ${ratingMin}->${ratingMax} puzzles: ${bucket.puzzles.length} pathLength: ${pathLength} paths: ${paths.length}` | |
); | |
pathColl.insert( | |
paths.map((ids, j) => ({ | |
_id: `${theme}_${bucket.tier}_${padRating(ratingMin)}-${padRating(ratingMax)}_${generation}_${j}`, | |
min: `${theme}_${bucket.tier}_${padRating(ratingMin)}`, | |
max: `${theme}_${bucket.tier}_${padRating(ratingMax)}`, | |
ids, | |
tier: bucket.tier, | |
theme: theme, | |
gen: generation, | |
})), | |
{ | |
ordered: false, | |
} | |
); | |
bucketIndex++; | |
}); | |
}); | |
pathColl.remove({ | |
gen: { | |
$ne: generation, | |
}, | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello, could you add some license statement, please?