-
-
Save devinrhode2/37727d116b723d9509cd7d7ff4726c64 to your computer and use it in GitHub Desktop.
import { globby } from 'globby' | |
import pMap from 'p-map' | |
import execa from 'execa' | |
// typical high end is about 350mb (based on watching "xo" processes inside Activity Monitor) | |
// ~~lets go with 400mb to be safe~~ | |
// Since we are grouping files together - we should have less oveerhead | |
// (node, yarn, xo wrapper, eslint wrapper, 1 ts process?) - lets go with just 350. | |
const TYPICAL_XO_MEMORY_USAGE = 0.35 | |
const TYPICAL_FREE_GB = 5 | |
// prettier-ignore | |
async function init() { | |
// yarn xo-fix-all **/*.{ts,tsx} | |
const fileList = await globby('**/*.{ts,tsx}', { gitignore: true }) | |
const guessOptimialThreads = () => { | |
let FREE_GB = parseInt(process.env.FREE_GB, 10) | |
if (!FREE_GB && fileList.length > 12) { | |
// a "good small commit" should be no more than 12 files (Devin's estimate) | |
console.warn( | |
`FREE_GB is not defined, defaulting to ${TYPICAL_FREE_GB}GB. If you have more free memory/ram, run: FREE_GB=12 yarn xo-fix-all` | |
) | |
FREE_GB = TYPICAL_FREE_GB | |
} | |
return Math.floor(FREE_GB / TYPICAL_XO_MEMORY_USAGE) | |
} | |
const XO_CONCURRENCY = parseInt(process.env.XO_CONCURRENCY, 10) || guessOptimialThreads() | |
console.log('Processing', fileList.length, 'typescript files...') | |
// XO_CONCURRENCY is how many files we process at the same time. | |
// Instead of creating 1 thread for each file, we could put all these files into 1 chunk | |
// So if XO_CONCURRENCY is 5, then we send 5 files all at once to XO. | |
// Transform fileList from a flat list, to an array of fileList chunks | |
// const chunkSize = XO_CONCURRENCY | |
const chunkSize = 5 || process.env.CHUNK_SIZE // 1 is best UX, in case XO is ever buggy, you see buggy behavior as fast as possible. | |
let chunkedFileList = [] | |
let numberOfChunks = Math.ceil(fileList.length / chunkSize) | |
for (let i = 0; chunkedFileList.length < numberOfChunks; i++) { | |
let thisChunk = fileList.slice( | |
i * chunkSize, | |
(i * chunkSize)+chunkSize | |
) | |
chunkedFileList.push(thisChunk) | |
} | |
// assert chunked list still has same number of files: | |
if (chunkedFileList.flat().length !== fileList.length) { | |
console.error('chunkedFileList 2nd to last chunk:', chunkedFileList[chunkedFileList.length-2],) | |
console.error('chunkedFileList last chunk:', chunkedFileList[chunkedFileList.length-1]) | |
console.error('original fileList:', fileList.length) | |
throw new Error('chunkedFileList does not have same length as fileList') | |
} | |
console.log('Going to lint', chunkSize, 'files per process') | |
console.log('With', XO_CONCURRENCY, 'parallel processes') | |
console.time('process all files') | |
await pMap( | |
chunkedFileList, | |
async fileSpec => { | |
// idk maybe this is garbage. Should probably always be array, or never be array. | |
let files = Array.isArray(fileSpec) | |
? fileSpec.join(' ') | |
: fileSpec | |
const {all} = await execa.command( | |
// use defaults, since we are doing 1 file per thread? | |
`node --unhandled-rejections=strict --max-old-space-size=${ | |
// Hopefully we don't need to heavily pad TYPICAL_XO_MEMORY_USAGE: | |
// 1. Theoretically should be avoiding some overhead by grouping | |
// 2. Some can be smaller, some larger, just need to hit good average. | |
// 3. Therefore, if we have memory problems, we need to increase this number, or make files smaller | |
( | |
TYPICAL_XO_MEMORY_USAGE * 1000 /*convert to MB*/ * | |
(Array.isArray(fileSpec) ? fileSpec.length : 1) | |
) | |
} --trace-warnings ./node_modules/xo/cli.js --fix ${files}`, | |
// maybe we can directly require/call cli? | |
// const cli = require('xo/cli.js') | |
// cli(file, {fix: true}) | |
{ all: true, env: { FORCE_COLOR: 'true' }, reject: false } | |
) | |
if (all.trim()) process.stdout.write(all + '\n') | |
}, | |
{ | |
concurrency: XO_CONCURRENCY, | |
} | |
) | |
console.timeEnd('process all files') | |
// 28:08.830 (m:ss.mmm) - fix from "scratch", 14 files at a time. (f268409312e1fea740f6009bba4785cb7b6d9e49) | |
} | |
init() |
Given that sending one file at a time did have one measurement that is lower, AND I consider it a better user experience (sitting there not printing anything for minutes certainly seems like a bug)... probably sending one file at a time is the best approach. Now, we could send maybe 1, 3, 5 files at a time, maybe 7, but it feels like more than that is going to be a worse experience overall.
~~If there's ever a bug developers run into (actually, I ran into recently) where XO doesn't seem to be getting all the rules and stuff correctly.. then it's best to process one file at a time, ~~
XO_CONCURRENCY=7 yarn xo-fix-all
..copied code into actual gist src..
15:12.181 (m:ss.mmm)
Lets try increasing chunkSize :)
3 threads, 5 files at a time, 7:32.654
(m:ss.mmm)
Maxing out memory, running xo --fix on all files at same time:
/usr/bin/time node --max-old-space-size=12288 ./node_modules/xo/cli.js --fix
...
276.14 real 326.96 user 61.91 sys
327/60 = 5.45 minutes (5 minutes, 27 seconds)
I feel like grouping by tsconfig may make the most sense for our NX monorepo..
discarded changes (git checkout src) ran again..
362.92 real 339.22 user 81.19 sys
363/60 = 6.05 (6 minutes, 3 seconds)
with 14 "threads", took 26 minutes 46.387 seconds... But I was mostly not using my computer.
~~Sequentially(?) sending sets of 14 files at a time... eek I did it wrong. I'm sending 14 groups of 14 files, to be run on 14 threads at once :/ ~~
it took 28 minutes and 8 seconds. But, I was heavily using my computer.
Actually, I found ANOTHER mistake - I was not even sending in the
chunkedFileList
- so these turned out to be mostly the same exact thing...More measurements might yield different results, or reveal that exactly how we break up the work doesn't really matter that much(?)