Created
February 24, 2023 06:22
-
-
Save riquito/aa822b4931e98cb1ad970e40987e5288 to your computer and use it in GitHub Desktop.
retrieve workspaces that have changed since ref, after applying ignore rules / get which package depend on another in a monorepo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (c) 2023 Riccardo Attilio Galli. All rights reserved. | |
// This work is licensed under the terms of the MIT license. | |
// For a copy, see <https://opensource.org/licenses/MIT>. | |
// So, ideally yarn workspaces list --json -R --since=HEAD~ | |
// would give use what we want | |
// We want a list of packages (and perhaps their locations) that | |
// have been meaningfully modified. | |
// We define as meaningfull whatever does not match in .gitignore / .npmignore. | |
// We may want to ignore some other kind of edits (e.g. README, tests), so | |
// that if the package doesn't "deserve" a new release, we don't produce it. | |
// We also want to include the packages affected by the changes (so we want to | |
// include the dependee of the modified package too). | |
// There is a command that almost does that | |
// yarn workspaces list --json -R --since=HEAD~ | |
// Unfortunately it does not support a "--ignore" option, so here we are. | |
// We are going to | |
// - 1) get the list of new/modifed/non-deleted files since a certain git ref | |
// - 2) remove from that list anything that matches the ignore-rules | |
// - 3) get the name of the affected packages | |
// - 4) get the name of the dependee of those packages | |
// About point 3: how do you get the package name? You'd need to | |
// read it from the package.json file - at what level is it? We can reuse | |
// the too-big output of yarn workspaces list to match them (it contains | |
// every package, without the ignore list). | |
// About point 4: how do you get the dependee? We can use `yarn foreach` | |
// pass every package name as one (or more) --from parameters to limit them | |
// (we can find package name location from env vars npm_package_name and | |
// npm_package_json. Using exec as run command. Also, $PWD can't be correctly | |
// read, but pwd can). | |
import { execSync } from 'node:child_process' | |
import { existsSync, open, openSync, readFileSync, unlinkSync, writeFileSync } from 'node:fs' | |
import { randomBytes } from 'node:crypto' | |
import path from 'node:path' | |
import { tmpdir } from 'node:os' | |
function getEditedFilesSinceRef(ref) { | |
const cmd = `git diff --name-status ${ref}..HEAD | grep -v -E '^D' | cut -d $'\t' -f 2` | |
return execSync(cmd).toString().trim().split('\n') | |
} | |
// Filter out files that match the given ignore patterns | |
// (note: works only with real files, untracked or part of the git index) | |
function getIgnoredFiles(ignoreConfPath, editedFiles) { | |
const editedFilesAsBashArgs = editedFiles.map((x) => `"${x}"`).join(' ') | |
// (sad note: I can only get the excluded files with `git ls -files -i`; I wanted the included files) | |
// note: the amount of edited files could be in the thousand worst case... does not scale in bash | |
const cmd = `git ls-files -co -i --exclude-per-directory="${ignoreConfPath}" -- ${editedFilesAsBashArgs}` | |
return execSync(cmd).toString().trim().split('\n') | |
} | |
function mktemp() { | |
// I know...still better than 1 million dependencies | |
const filename = 'changed-since-' + randomBytes(8).readUInt32LE(0) | |
const fpath = path.join(tmpdir(), filename) | |
process.on('exit', () => unlinkSync(fpath)) | |
openSync(fpath, 'w') // not exclusive because I need to feed the path to another process | |
return { path: fpath } | |
} | |
/** | |
* Return all items in a that are not in b | |
* | |
* a and b must be sorted and unique | |
* | |
* @param {array<string>} a | |
* @param {array<string>} b | |
*/ | |
function comm(a, b) { | |
if (a.length === 0 || b.length === 0) { | |
return a | |
} | |
let a_idx = 0 | |
let b_idx = 0 | |
let output = [] | |
while (a_idx < a.length) { | |
if (a[a_idx] > b[b_idx]) { | |
b_idx += 1 | |
} else if (a[a_idx] === b[b_idx]) { | |
a_idx += 1 | |
b_idx += 1 | |
} else { | |
output.push(a[a_idx]) | |
a_idx += 1 | |
} | |
} | |
return output | |
} | |
function getChangedPackagesUsingYarnListSince( | |
ref, | |
{ includeDependee } = { includeDependee: false } | |
) { | |
const dependeeFlag = includeDependee ? '-R' : '' | |
const cmd = `yarn workspaces list --json ${dependeeFlag} --since=${ref}` | |
const jsonl = execSync(cmd).toString().trim().split('\n') | |
const results = jsonl.map((x) => JSON.parse(x)) | |
const slice_idx = results.length > 0 && results[0].name === 'root' ? 1 : 0 | |
return results.slice(slice_idx) | |
} | |
function getMeaningfullyChangedPackages( | |
meaningfullyChangedPaths, | |
changedPackagesUsingYarnListSince | |
) { | |
// TODO: changedPackagesUsingYarnListSince are ordered by location | |
// so we could iterate in O(n). For now we'll go with O(n^2) | |
const packagesLocations = changedPackagesUsingYarnListSince.map((x) => x.location) | |
const loc2yarnData = {} | |
changedPackagesUsingYarnListSince.forEach((x) => (loc2yarnData[x.location] = x)) | |
const output = [] | |
meaningfullyChangedPaths.forEach((path) => { | |
Object.keys(loc2yarnData).forEach((packageLocation) => { | |
if (path.startsWith(packageLocation)) { | |
output.push(loc2yarnData[packageLocation]) | |
delete loc2yarnData[packageLocation] | |
} | |
}) | |
}) | |
return output | |
} | |
function getAllPackagesAffected(pkgNames) { | |
if (pkgNames.length === 0) { | |
return [] | |
} | |
// There are two ways to pass multiple packages | |
// yarn workspaces foreach --from="{foo,bar}" exec 'pwd' | |
// yarn workspaces foreach --from="foo" --from="bar" exec 'pwd' | |
const fromOpt = pkgNames.length === 1 ? pkgNames[0] : `{${pkgNames.join(',')}}` | |
const cmd = `yarn workspaces foreach -R --from="${fromOpt}" exec 'pwd'` | |
const cwd = process.cwd() | |
return execSync(cmd) | |
.toString() | |
.trim() | |
.split('\n') | |
.filter((x) => !x.startsWith('Done ')) | |
.map((x) => path.relative(cwd, x)) | |
} | |
function main() { | |
const args = process.argv.slice(2) | |
if (args.length < 1) { | |
console.error('Usage: changed-since <ref>') | |
process.exit(1) | |
} | |
const ref = args[0] | |
const ignore_conf_paths = ['hello'] | |
let combined_ignore_files = '' | |
ignore_conf_paths.forEach((maybePath) => { | |
try { | |
combined_ignore_files += readFileSync(maybePath) | |
} catch (err) { | |
if (err.code === 'ENOENT') { | |
console.error('Error: config path does not exist: ' + maybePath) | |
process.exit(1) | |
} | |
throw err | |
} | |
}) | |
const editedFiles = getEditedFilesSinceRef(ref) | |
console.log('combined gitignore', combined_ignore_files) | |
console.log('edited files', editedFiles) | |
// const ignoreConfPath = mktemp().path | |
// writeFileSync(ignoreConfPath, combined_ignore_files) | |
// console.log('mktemp', ignoreConfPath) | |
// const ignoredFiles = getIgnoredFiles(ignoreConfPath, ['foo']) | |
// console.log('ignored files', ignoredFiles) | |
// const meaningfullyChanged = comm(editedFiles, ignoredFiles) | |
let ignoreRules = buildIgnoreRules(combined_ignore_files) | |
let meaningfullyChanged = editedFiles | |
.map((path) => [path, gitignore(ignoreRules, path)]) | |
.filter(([path, shouldIgnore]) => !shouldIgnore) | |
.map(([path]) => path) | |
console.log('meaningfully changed', meaningfullyChanged) | |
const changedPackagesUsingYarnListSince = getChangedPackagesUsingYarnListSince(ref) | |
console.log('changed using yarn list since', changedPackagesUsingYarnListSince) | |
// now we have to check if any of the non-ignored files match the locations | |
// (so if a whole package has been ignored out, we ignore it too) | |
const packagesWithMeaningfulUpdates = getMeaningfullyChangedPackages( | |
meaningfullyChanged, | |
changedPackagesUsingYarnListSince | |
) | |
console.log('packages with meaningful updates', packagesWithMeaningfulUpdates) | |
// include dependees | |
const packagesAffected = getAllPackagesAffected(packagesWithMeaningfulUpdates.map((x) => x.name)) | |
console.log('packagesAffected', packagesAffected) | |
} | |
//main() | |
// if a rule starts with / or contain a slash, it's relative to the .gitignore file | |
const RELATIVE_IGNORE_LINE = /(^\/|\/.+$)/ | |
function buildSingleLineIgnoreFunc(globDef) { | |
globDef = globDef.trim() | |
if (globDef === '' || globDef.startsWith('#')) { | |
return null | |
} | |
let maybeInvert = (x) => x | |
if (globDef[0] === '!' && globDef.length === 1) { | |
return null | |
} else if (globDef[0] === '!') { | |
maybeInvert = (x) => !x | |
globDef = globDef.slice(1) | |
} | |
let isRelative = RELATIVE_IGNORE_LINE.test(globDef) | |
if (isRelative && globDef.startsWith('/')) { | |
globDef = globDef.slice(1) | |
} | |
// ensure that glob characters that are special in regexes | |
// are not escaped alongside the rest | |
globDef = globDef.replace('**', '<2star>').replace('*', '<1star>').replace('?', '<qmark>') | |
// escape characters that are meaningful in a regex | |
globDef = globDef.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') // $& means the whole matched string | |
let regex = new RegExp( | |
(isRelative ? '^' : '') + | |
globDef | |
.replace('**', '<2star>') | |
.replace('*', '<1star>') | |
.replace('<qmark>', '.?') | |
.replace('<2star>', '.*?[/|$]') | |
.replace('<1star>', '[^/]*(/$)?') | |
) | |
/* @param {string} line */ | |
return (line) => { | |
let result = line.match(regex) | |
if (result) { | |
return maybeInvert(true) | |
} else { | |
return null | |
} | |
} | |
} | |
function buildIgnoreRules(ignoreConf) { | |
return ignoreConf | |
.trim() | |
.split('\n') | |
.map(buildSingleLineIgnoreFunc) | |
.filter((f) => f !== null) | |
} | |
function gitignore(ignoreRules, line) { | |
let ignore = false | |
ignoreRules.forEach((ignoreFunc) => { | |
const shouldIgnore = ignoreFunc(line) | |
if (shouldIgnore !== null) { | |
// there was a match (ignore or keep (!)) | |
ignore = shouldIgnore | |
} | |
}) | |
return ignore | |
} | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
// Copyright (c) 2023 Riccardo Attilio Galli. All rights reserved. | |
// This work is licensed under the terms of the MIT license. | |
// For a copy, see <https://opensource.org/licenses/MIT>. | |
import { readFileSync } from 'node:fs' | |
import path from 'node:path' | |
import glob from 'glob' | |
// Get list of monorepo packages, their location and their dependencies. | |
// Remove from their dependencies the non-monorepo packages. | |
// Given a package name, output JSONL, (ordered by location(?) but) the first line is the package, the rest is dependee in the monorepo | |
function collectWorkspacesData() { | |
const conf = JSON.parse(readFileSync('./package.json', { encoding: 'utf8' })) | |
const workspacesGlob = conf.workspaces || [] | |
const workspacesData = [] | |
for (const globPattern of workspacesGlob) { | |
for (const workspaceLocation of glob.sync(globPattern)) { | |
const data = collectSingleWorkspaceData(workspaceLocation) | |
if (data !== null) { | |
workspacesData.push(data) | |
} | |
} | |
} | |
return workspacesData | |
} | |
function collectSingleWorkspaceData(workspaceLocation) { | |
let packageJSONPath | |
try { | |
packageJSONPath = readFileSync(path.join(workspaceLocation, 'package.json')) | |
} catch (e) { | |
if (e.code === 'ENOENT') { | |
return null | |
} | |
throw e | |
} | |
const packageJSON = JSON.parse(packageJSONPath) | |
return { | |
name: packageJSON.name, | |
version: packageJSON.version, | |
location: workspaceLocation, | |
dependencies: packageJSON.dependencies || {}, | |
devDependencies: packageJSON.devDependencies || {}, | |
} | |
} | |
function getMonorepoTree(workspacesData) { | |
const repoPackagesSet = new Set(workspacesData.map((p) => p.name)) | |
const tree = {} | |
for (const singleWorkspaceData of workspacesData) { | |
const { name, version, location } = singleWorkspaceData | |
const dependencies = [] | |
const devDependencies = [] | |
for (const [depName, depVersion] of Object.entries(singleWorkspaceData.dependencies)) { | |
if (repoPackagesSet.has(depName)) { | |
dependencies.push([depName, depVersion]) | |
} | |
} | |
for (const [devDepName, devDepVersion] of Object.entries(singleWorkspaceData.devDependencies)) { | |
if (repoPackagesSet.has(devDepName)) { | |
devDependencies.push([devDepName, devDepVersion]) | |
} | |
} | |
tree[name] = { | |
name, | |
version, | |
location, | |
dependencies, | |
devDependencies, | |
} | |
} | |
return tree | |
} | |
function shiftSet(set) { | |
for (const value of set) { | |
set.delete(value) | |
return value | |
} | |
} | |
function getRecursiveDependeesOf(packageNames, tree) { | |
const pkgs = packageNames.map((packageName) => tree[packageName]).filter((pkg) => Boolean(pkg)) | |
if (pkgs.length === 0) { | |
return [] | |
} | |
const visited = new Set(packageNames) | |
const toVisit = new Set( | |
pkgs.flatMap((pkg) => [pkg.dependencies, pkg.devDependencies].flat().map((x) => x[0])) | |
) | |
while (toVisit.size > 0) { | |
const pkgToVisit = shiftSet(toVisit) | |
if (visited.has(pkgToVisit)) { | |
continue | |
} | |
tree[pkgToVisit].dependencies.forEach((x) => toVisit.add(x[0])) | |
tree[pkgToVisit].devDependencies.forEach((x) => toVisit.add(x[0])) | |
pkgs.push(tree[pkgToVisit]) | |
visited.add(pkgToVisit) | |
} | |
return pkgs | |
} | |
function dependeeToJSONL(dependees) { | |
const data = [] | |
Object.values(dependees).forEach(({ name, location }) => data.push([location, name])) | |
data.sort() | |
return data.map(([location, name]) => ({ | |
location, | |
name, | |
})) | |
} | |
function main() { | |
const tree = getMonorepoTree(collectWorkspacesData()) | |
const dependees = getRecursiveDependeesOf( | |
['@foo/bar', 'lodash'], | |
tree | |
) | |
dependeeToJSONL(dependees).forEach((x) => console.log(x)) | |
} | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment