Skip to content

Instantly share code, notes, and snippets.

@riquito
Created February 24, 2023 06:22
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save riquito/aa822b4931e98cb1ad970e40987e5288 to your computer and use it in GitHub Desktop.
Save riquito/aa822b4931e98cb1ad970e40987e5288 to your computer and use it in GitHub Desktop.
retrieve workspaces that have changed since ref, after applying ignore rules / get which package depend on another in a monorepo
// Copyright (c) 2023 Riccardo Attilio Galli. All rights reserved.
// This work is licensed under the terms of the MIT license.
// For a copy, see <https://opensource.org/licenses/MIT>.
// So, ideally yarn workspaces list --json -R --since=HEAD~
// would give use what we want
// We want a list of packages (and perhaps their locations) that
// have been meaningfully modified.
// We define as meaningfull whatever does not match in .gitignore / .npmignore.
// We may want to ignore some other kind of edits (e.g. README, tests), so
// that if the package doesn't "deserve" a new release, we don't produce it.
// We also want to include the packages affected by the changes (so we want to
// include the dependee of the modified package too).
// There is a command that almost does that
// yarn workspaces list --json -R --since=HEAD~
// Unfortunately it does not support a "--ignore" option, so here we are.
// We are going to
// - 1) get the list of new/modifed/non-deleted files since a certain git ref
// - 2) remove from that list anything that matches the ignore-rules
// - 3) get the name of the affected packages
// - 4) get the name of the dependee of those packages
// About point 3: how do you get the package name? You'd need to
// read it from the package.json file - at what level is it? We can reuse
// the too-big output of yarn workspaces list to match them (it contains
// every package, without the ignore list).
// About point 4: how do you get the dependee? We can use `yarn foreach`
// pass every package name as one (or more) --from parameters to limit them
// (we can find package name location from env vars npm_package_name and
// npm_package_json. Using exec as run command. Also, $PWD can't be correctly
// read, but pwd can).
import { execSync } from 'node:child_process'
import { existsSync, open, openSync, readFileSync, unlinkSync, writeFileSync } from 'node:fs'
import { randomBytes } from 'node:crypto'
import path from 'node:path'
import { tmpdir } from 'node:os'
function getEditedFilesSinceRef(ref) {
const cmd = `git diff --name-status ${ref}..HEAD | grep -v -E '^D' | cut -d $'\t' -f 2`
return execSync(cmd).toString().trim().split('\n')
}
// Filter out files that match the given ignore patterns
// (note: works only with real files, untracked or part of the git index)
function getIgnoredFiles(ignoreConfPath, editedFiles) {
const editedFilesAsBashArgs = editedFiles.map((x) => `"${x}"`).join(' ')
// (sad note: I can only get the excluded files with `git ls -files -i`; I wanted the included files)
// note: the amount of edited files could be in the thousand worst case... does not scale in bash
const cmd = `git ls-files -co -i --exclude-per-directory="${ignoreConfPath}" -- ${editedFilesAsBashArgs}`
return execSync(cmd).toString().trim().split('\n')
}
function mktemp() {
// I know...still better than 1 million dependencies
const filename = 'changed-since-' + randomBytes(8).readUInt32LE(0)
const fpath = path.join(tmpdir(), filename)
process.on('exit', () => unlinkSync(fpath))
openSync(fpath, 'w') // not exclusive because I need to feed the path to another process
return { path: fpath }
}
/**
* Return all items in a that are not in b
*
* a and b must be sorted and unique
*
* @param {array<string>} a
* @param {array<string>} b
*/
function comm(a, b) {
if (a.length === 0 || b.length === 0) {
return a
}
let a_idx = 0
let b_idx = 0
let output = []
while (a_idx < a.length) {
if (a[a_idx] > b[b_idx]) {
b_idx += 1
} else if (a[a_idx] === b[b_idx]) {
a_idx += 1
b_idx += 1
} else {
output.push(a[a_idx])
a_idx += 1
}
}
return output
}
function getChangedPackagesUsingYarnListSince(
ref,
{ includeDependee } = { includeDependee: false }
) {
const dependeeFlag = includeDependee ? '-R' : ''
const cmd = `yarn workspaces list --json ${dependeeFlag} --since=${ref}`
const jsonl = execSync(cmd).toString().trim().split('\n')
const results = jsonl.map((x) => JSON.parse(x))
const slice_idx = results.length > 0 && results[0].name === 'root' ? 1 : 0
return results.slice(slice_idx)
}
function getMeaningfullyChangedPackages(
meaningfullyChangedPaths,
changedPackagesUsingYarnListSince
) {
// TODO: changedPackagesUsingYarnListSince are ordered by location
// so we could iterate in O(n). For now we'll go with O(n^2)
const packagesLocations = changedPackagesUsingYarnListSince.map((x) => x.location)
const loc2yarnData = {}
changedPackagesUsingYarnListSince.forEach((x) => (loc2yarnData[x.location] = x))
const output = []
meaningfullyChangedPaths.forEach((path) => {
Object.keys(loc2yarnData).forEach((packageLocation) => {
if (path.startsWith(packageLocation)) {
output.push(loc2yarnData[packageLocation])
delete loc2yarnData[packageLocation]
}
})
})
return output
}
function getAllPackagesAffected(pkgNames) {
if (pkgNames.length === 0) {
return []
}
// There are two ways to pass multiple packages
// yarn workspaces foreach --from="{foo,bar}" exec 'pwd'
// yarn workspaces foreach --from="foo" --from="bar" exec 'pwd'
const fromOpt = pkgNames.length === 1 ? pkgNames[0] : `{${pkgNames.join(',')}}`
const cmd = `yarn workspaces foreach -R --from="${fromOpt}" exec 'pwd'`
const cwd = process.cwd()
return execSync(cmd)
.toString()
.trim()
.split('\n')
.filter((x) => !x.startsWith('Done '))
.map((x) => path.relative(cwd, x))
}
function main() {
const args = process.argv.slice(2)
if (args.length < 1) {
console.error('Usage: changed-since <ref>')
process.exit(1)
}
const ref = args[0]
const ignore_conf_paths = ['hello']
let combined_ignore_files = ''
ignore_conf_paths.forEach((maybePath) => {
try {
combined_ignore_files += readFileSync(maybePath)
} catch (err) {
if (err.code === 'ENOENT') {
console.error('Error: config path does not exist: ' + maybePath)
process.exit(1)
}
throw err
}
})
const editedFiles = getEditedFilesSinceRef(ref)
console.log('combined gitignore', combined_ignore_files)
console.log('edited files', editedFiles)
// const ignoreConfPath = mktemp().path
// writeFileSync(ignoreConfPath, combined_ignore_files)
// console.log('mktemp', ignoreConfPath)
// const ignoredFiles = getIgnoredFiles(ignoreConfPath, ['foo'])
// console.log('ignored files', ignoredFiles)
// const meaningfullyChanged = comm(editedFiles, ignoredFiles)
let ignoreRules = buildIgnoreRules(combined_ignore_files)
let meaningfullyChanged = editedFiles
.map((path) => [path, gitignore(ignoreRules, path)])
.filter(([path, shouldIgnore]) => !shouldIgnore)
.map(([path]) => path)
console.log('meaningfully changed', meaningfullyChanged)
const changedPackagesUsingYarnListSince = getChangedPackagesUsingYarnListSince(ref)
console.log('changed using yarn list since', changedPackagesUsingYarnListSince)
// now we have to check if any of the non-ignored files match the locations
// (so if a whole package has been ignored out, we ignore it too)
const packagesWithMeaningfulUpdates = getMeaningfullyChangedPackages(
meaningfullyChanged,
changedPackagesUsingYarnListSince
)
console.log('packages with meaningful updates', packagesWithMeaningfulUpdates)
// include dependees
const packagesAffected = getAllPackagesAffected(packagesWithMeaningfulUpdates.map((x) => x.name))
console.log('packagesAffected', packagesAffected)
}
//main()
// if a rule starts with / or contain a slash, it's relative to the .gitignore file
const RELATIVE_IGNORE_LINE = /(^\/|\/.+$)/
function buildSingleLineIgnoreFunc(globDef) {
globDef = globDef.trim()
if (globDef === '' || globDef.startsWith('#')) {
return null
}
let maybeInvert = (x) => x
if (globDef[0] === '!' && globDef.length === 1) {
return null
} else if (globDef[0] === '!') {
maybeInvert = (x) => !x
globDef = globDef.slice(1)
}
let isRelative = RELATIVE_IGNORE_LINE.test(globDef)
if (isRelative && globDef.startsWith('/')) {
globDef = globDef.slice(1)
}
// ensure that glob characters that are special in regexes
// are not escaped alongside the rest
globDef = globDef.replace('**', '<2star>').replace('*', '<1star>').replace('?', '<qmark>')
// escape characters that are meaningful in a regex
globDef = globDef.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') // $& means the whole matched string
let regex = new RegExp(
(isRelative ? '^' : '') +
globDef
.replace('**', '<2star>')
.replace('*', '<1star>')
.replace('<qmark>', '.?')
.replace('<2star>', '.*?[/|$]')
.replace('<1star>', '[^/]*(/$)?')
)
/* @param {string} line */
return (line) => {
let result = line.match(regex)
if (result) {
return maybeInvert(true)
} else {
return null
}
}
}
function buildIgnoreRules(ignoreConf) {
return ignoreConf
.trim()
.split('\n')
.map(buildSingleLineIgnoreFunc)
.filter((f) => f !== null)
}
function gitignore(ignoreRules, line) {
let ignore = false
ignoreRules.forEach((ignoreFunc) => {
const shouldIgnore = ignoreFunc(line)
if (shouldIgnore !== null) {
// there was a match (ignore or keep (!))
ignore = shouldIgnore
}
})
return ignore
}
main()
#!/usr/bin/env node
// Copyright (c) 2023 Riccardo Attilio Galli. All rights reserved.
// This work is licensed under the terms of the MIT license.
// For a copy, see <https://opensource.org/licenses/MIT>.
import { readFileSync } from 'node:fs'
import path from 'node:path'
import glob from 'glob'
// Get list of monorepo packages, their location and their dependencies.
// Remove from their dependencies the non-monorepo packages.
// Given a package name, output JSONL, (ordered by location(?) but) the first line is the package, the rest is dependee in the monorepo
function collectWorkspacesData() {
const conf = JSON.parse(readFileSync('./package.json', { encoding: 'utf8' }))
const workspacesGlob = conf.workspaces || []
const workspacesData = []
for (const globPattern of workspacesGlob) {
for (const workspaceLocation of glob.sync(globPattern)) {
const data = collectSingleWorkspaceData(workspaceLocation)
if (data !== null) {
workspacesData.push(data)
}
}
}
return workspacesData
}
function collectSingleWorkspaceData(workspaceLocation) {
let packageJSONPath
try {
packageJSONPath = readFileSync(path.join(workspaceLocation, 'package.json'))
} catch (e) {
if (e.code === 'ENOENT') {
return null
}
throw e
}
const packageJSON = JSON.parse(packageJSONPath)
return {
name: packageJSON.name,
version: packageJSON.version,
location: workspaceLocation,
dependencies: packageJSON.dependencies || {},
devDependencies: packageJSON.devDependencies || {},
}
}
function getMonorepoTree(workspacesData) {
const repoPackagesSet = new Set(workspacesData.map((p) => p.name))
const tree = {}
for (const singleWorkspaceData of workspacesData) {
const { name, version, location } = singleWorkspaceData
const dependencies = []
const devDependencies = []
for (const [depName, depVersion] of Object.entries(singleWorkspaceData.dependencies)) {
if (repoPackagesSet.has(depName)) {
dependencies.push([depName, depVersion])
}
}
for (const [devDepName, devDepVersion] of Object.entries(singleWorkspaceData.devDependencies)) {
if (repoPackagesSet.has(devDepName)) {
devDependencies.push([devDepName, devDepVersion])
}
}
tree[name] = {
name,
version,
location,
dependencies,
devDependencies,
}
}
return tree
}
function shiftSet(set) {
for (const value of set) {
set.delete(value)
return value
}
}
function getRecursiveDependeesOf(packageNames, tree) {
const pkgs = packageNames.map((packageName) => tree[packageName]).filter((pkg) => Boolean(pkg))
if (pkgs.length === 0) {
return []
}
const visited = new Set(packageNames)
const toVisit = new Set(
pkgs.flatMap((pkg) => [pkg.dependencies, pkg.devDependencies].flat().map((x) => x[0]))
)
while (toVisit.size > 0) {
const pkgToVisit = shiftSet(toVisit)
if (visited.has(pkgToVisit)) {
continue
}
tree[pkgToVisit].dependencies.forEach((x) => toVisit.add(x[0]))
tree[pkgToVisit].devDependencies.forEach((x) => toVisit.add(x[0]))
pkgs.push(tree[pkgToVisit])
visited.add(pkgToVisit)
}
return pkgs
}
function dependeeToJSONL(dependees) {
const data = []
Object.values(dependees).forEach(({ name, location }) => data.push([location, name]))
data.sort()
return data.map(([location, name]) => ({
location,
name,
}))
}
function main() {
const tree = getMonorepoTree(collectWorkspacesData())
const dependees = getRecursiveDependeesOf(
['@foo/bar', 'lodash'],
tree
)
dependeeToJSONL(dependees).forEach((x) => console.log(x))
}
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment