HerbCaudill/flaky.js

## readme.md

      
    Raw
  

              readme.md
            
          
    Debugging an intermittent test failure

I just spent 10+ days on a single bug. Posting these notes in case they're helpful to someone else (or to future me!) in a similar situation.
The bug manifested as an intermittent test failure. This was an end-to-end test using Playwright, and it involved multiple automated browsers connecting to a server and doing things concurrently.
Each browser produced hundreds of lines of debug logging, and so did the server; and for most of the time I was trying to solve this problem, I was:

Running the test multiple times until I got both a failure and a success
Copying the logs from the browser and from the server into text files
Running a VS Code macro that did a bunch of search-and-replace work to reduce the visual noise in the logs
Diffing the good and bad logs in VS Code to see what was happening differently
Tweaking something in the code (adding logging somewhere, adding pauses, changing the order in which things were done, etc.)
Repeating all of the above

This was extremely laborious and error-prone: I had lots of opportunities to miss a step, or get distracted, or lose my train of thought altogether.
The only way I managed to get out of this soul-destroying quagmire was to take a bit of time to create some tooling to automate this process so that I could get a fast feedback loop going.
First I needed to get the logging from both the browser and the server into a single stream.
I did this in a Playwright using a base page class that looks like this:
export class BasePage {
  userName?: string
  teamName?: string

  constructor(readonly page: Page) {
    this.page = page
  }

  // GENERAL

  log(msg: ConsoleMessage) {
    const text: string = msg.text().replace(/(color: #([0-9A-F]{6}))|(color: inherit)|%c/g, '')
    console.log(text)
  }

  async start() {
    await this.page.goto('/')

    // feed browser logs to test output
    this.page.on('console', msg => {
      this.log(msg)
    })

    // enable debug logging
    await this.page.evaluate(`window.localStorage.setItem('debug', '*')`)
    // reload so these take effect
    await pause(500)
    await this.page.reload()
    return this
  }

  // ...
}
I'm using the debug-js library for debug logging, and it took me a while to figure out how to get the debug variable set correctly: You have to add it to local storage, give that a moment to settle, and then reload the page.
Once that's done, you can just listen for console events from the browser and re-log them in the test code that's running in the terminal.
Then I wrote the script shown in flaky.js, which automates the steps listed above. It runs the test until it gets one success and one failure, each time piping the output to a text file.
It then runs each set of logs through a process that is intended to make it possible to diff the logs with a minimum of visual noise.
In my case there were lots of hashes and guids that were different on each run; these needed to be replaced with tokens that would be the same across runs.
I also replaced certain key bits of text with emoji, which made the logs easier to scan.
Once I had this set up, I was able to pin down the problem within a few hours.
Here's a screenshot of that diff; in the terminal you can see the test run where it finally stopped failing. 🥳


## flaky.js
import { exec as _exec } from 'child_process'
import fs from 'fs'
import path from 'path'
import { fileURLToPath } from 'url'
import { promisify } from 'util'

const exec = promisify(_exec)

// ensure outputDir exists
const __dirname = fileURLToPath(new URL('.', import.meta.url))
const outputDir = path.join(__dirname, '..', '.flaky')
if (!fs.existsSync(outputDir)) fs.mkdirSync(outputDir)

// option to reuse the last test run (useful for tweaking the cleanup function without having to wait for tests to rerun)
const flag = process.argv[2]
const reuse = flag === '-r' || flag === '--reuse'

const output = {
  good: undefined,
  bad: undefined,
}
const maxRuns = 25 // don't keep trying forever

// collect good & bad test logs
if (reuse) {
  console.log('Reusing the last test run')
  output.good = readFile('good.raw.txt')
  output.bad = readFile('bad.raw.txt')
} else {
  console.log('Running test until we have one success and one failure')

  for (let i = 0; i < maxRuns; i++) {
    const passed = await runTest()
    const report = readFile('log.txt')

    if (passed) {
      console.log('✅')
      writeFile('good.raw.txt', report)
      output.good = report
    } else {
      console.log('❌')
      writeFile('bad.raw.txt', report)
      output.bad = report
    }

    // once we have one of each we can continue
    if (output.bad && output.good) break
  }
}

// clean up both sets of logs and output good.txt and bad.txt
for (const key in output) {
  console.log(`Writing ${key}.txt `)
  const outputLines = output[key].split('\n')
  const filtered = outputLines.filter(filterLogs)
  const combined = filtered.join('\n')
  const cleaned = cleanLogs(combined)
  writeFile(`${key}.txt`, cleaned)
}

// commit everything so we can diff runs with each other
await exec(`git commit -a -m "update flaky test output"`)

// HELPERS

async function runTest() {
  try {
    // pipe stdout and stderr to log.txt
    await exec(`pnpm test:pw &> ${outputDir}/log.txt`)
    // test passed
    return true
  } catch (error) {
    // test failed (we still got the output)
    return false
  }
}

function filterLogs(line) {
  return (
    line.length > 0 &&
    !line.startsWith('[vite]') &&
    !line.startsWith('Download the React DevTools') &&
    !line.includes('websocket:') &&
    !line.includes('Adapters ready')
  )
}

// Reduce visual noise to a minimum in the logs, making it easier to visually spot patterns in the diff
function cleanLogs(output) {
  const A = 65
  const Z = 90
  let i = A
  let tokens = {
    // A1FR9f: A,
    // Q4SKk9: B,
    // etc.
  }

  // replace all instances of any given hash with a single letter
  // this makes it possible to diff different runs of the test
  function tokenize(match, p1) {
    if (i > Z) i = A
    const token = tokens[p1] ?? String.fromCharCode(i++)
    tokens[p1] = token
    return match.replace(p1, token)
  }

  const deviceIds = [...output.matchAll(/{user: (\w+), deviceId: (\w+)}/g)].map(match => {
    return {
      userName: match[1],
      deviceId: match[2],
    }
  })

  const teamIds = [...output.matchAll(/shareId: (\w+)/gi)].map((match, i) => ({
    teamName: `TEAM-${i + 1}`,
    teamId: match[1],
  }))

  const documentIds = [...output.matchAll(/created root document (\w+)/gi)].map((match, i) => ({
    documentName: `DOC-${i + 1}`,
    documentId: match[1],
  }))

  const transforms = [
    // strip ANSI color codes
    [/\u001B\[\d+m/g, ''],
    [/\[[0-9;]+m/g, ''],

    // Remove pnpm commands
    [/^>.*$/gm, ''],

    // Remove quotes
    [/"|'|`/g, ''],

    // Remove prefixes
    [/\[WebServer\]|localfirst\:|auth\:|automerge-repo\:|/g, ''],

    // Remove timestamps
    [/\+\d+(ms|s)/g, ''],
    [/timestamp: \d+/g, ''],

    // Remove snapshot hashes
    [/snapshot,(\w+)/g, ''],

    // Remove %o %s
    [/ %o/g, ''],
    [/ %s/g, ''],

    // replace deviceIds with userNames
    ...deviceIds.map(({ userName, deviceId }) => [new RegExp(deviceId, 'g'), userName]),

    // tokenize teamIds as TEAM-1 etc
    ...teamIds.map(({ teamId, teamName }) => [new RegExp(teamId, 'g'), teamName]),

    // tokenize documentIds as DOC-1 etc
    ...documentIds.map(({ documentId, documentName }) => [new RegExp(documentId, 'g'), documentName]),
    //  truncated documentIds
    ...documentIds.map(({ documentId, documentName }) => [new RegExp(documentId.slice(0, 5), 'g'), documentName]),

    // Tokenize remaining hashes
    [/\b(?=\w*\d)(\w{10,})\b/g, tokenize],

    // Remove buffers
    [/(<Buffer(\w|\s|\.)+(>|$))|({\s*0:[0-9:,]+})|\[(\s|\d|,)+.*?(]|$)/g, '...'],

    // emoji
    [/noise/gi, '🐒'],
    [/alice/gi, '👩🏾'],
    [/bob/gi, '👨‍🦲'],
    [/charlie/gi, '👳🏽‍♂️'],
    [/dwight/gi, '👴'],
    [/herb/gi, '🤓'],
    [/laptop/gi, '💻'],
    [/localhost/gi, '🤖'],
    [/phone/gi, '📱'],

    // Collapse whitespace
    [/( |\t)+/g, ' '],
    [/^\s+/gm, ''],
    [/\s+$/gm, ''],
  ]
  return transforms.reduce((acc, [rx, replacement]) => acc.replaceAll(rx, replacement), output)
}

function readFile(filename) {
  return fs.readFileSync(path.join(outputDir, filename), 'utf8')
}

function writeFile(filename, content) {
  const stringified = typeof content === 'string' ? content : JSON.stringify(content)
  fs.writeFileSync(path.join(outputDir, filename), stringified)
}
	import { exec as _exec } from 'child_process'
	import fs from 'fs'
	import path from 'path'
	import { fileURLToPath } from 'url'
	import { promisify } from 'util'

	const exec = promisify(_exec)

	// ensure outputDir exists
	const __dirname = fileURLToPath(new URL('.', import.meta.url))
	const outputDir = path.join(__dirname, '..', '.flaky')
	if (!fs.existsSync(outputDir)) fs.mkdirSync(outputDir)

	// option to reuse the last test run (useful for tweaking the cleanup function without having to wait for tests to rerun)
	const flag = process.argv[2]
	const reuse = flag === '-r' \|\| flag === '--reuse'

	const output = {
	good: undefined,
	bad: undefined,
	}
	const maxRuns = 25 // don't keep trying forever

	// collect good & bad test logs
	if (reuse) {
	console.log('Reusing the last test run')
	output.good = readFile('good.raw.txt')
	output.bad = readFile('bad.raw.txt')
	} else {
	console.log('Running test until we have one success and one failure')

	for (let i = 0; i < maxRuns; i++) {
	const passed = await runTest()
	const report = readFile('log.txt')

	if (passed) {
	console.log('✅')
	writeFile('good.raw.txt', report)
	output.good = report
	} else {
	console.log('❌')
	writeFile('bad.raw.txt', report)
	output.bad = report
	}

	// once we have one of each we can continue
	if (output.bad && output.good) break
	}
	}

	// clean up both sets of logs and output good.txt and bad.txt
	for (const key in output) {
	console.log(`Writing ${key}.txt `)
	const outputLines = output[key].split('\n')
	const filtered = outputLines.filter(filterLogs)
	const combined = filtered.join('\n')
	const cleaned = cleanLogs(combined)
	writeFile(`${key}.txt`, cleaned)
	}

	// commit everything so we can diff runs with each other
	await exec(`git commit -a -m "update flaky test output"`)

	// HELPERS

	async function runTest() {
	try {
	// pipe stdout and stderr to log.txt
	await exec(`pnpm test:pw &> ${outputDir}/log.txt`)
	// test passed
	return true
	} catch (error) {
	// test failed (we still got the output)
	return false
	}
	}

	function filterLogs(line) {
	return (
	line.length > 0 &&
	!line.startsWith('[vite]') &&
	!line.startsWith('Download the React DevTools') &&
	!line.includes('websocket:') &&
	!line.includes('Adapters ready')
	)
	}

	// Reduce visual noise to a minimum in the logs, making it easier to visually spot patterns in the diff
	function cleanLogs(output) {
	const A = 65
	const Z = 90
	let i = A
	let tokens = {
	// A1FR9f: A,
	// Q4SKk9: B,
	// etc.
	}

	// replace all instances of any given hash with a single letter
	// this makes it possible to diff different runs of the test
	function tokenize(match, p1) {
	if (i > Z) i = A
	const token = tokens[p1] ?? String.fromCharCode(i++)
	tokens[p1] = token
	return match.replace(p1, token)
	}

	const deviceIds = [...output.matchAll(/{user: (\w+), deviceId: (\w+)}/g)].map(match => {
	return {
	userName: match[1],
	deviceId: match[2],
	}
	})

	const teamIds = [...output.matchAll(/shareId: (\w+)/gi)].map((match, i) => ({
	teamName: `TEAM-${i + 1}`,
	teamId: match[1],
	}))

	const documentIds = [...output.matchAll(/created root document (\w+)/gi)].map((match, i) => ({
	documentName: `DOC-${i + 1}`,
	documentId: match[1],
	}))

	const transforms = [
	// strip ANSI color codes
	[/\u001B\[\d+m/g, ''],
	[/\[[0-9;]+m/g, ''],

	// Remove pnpm commands
	[/^>.*$/gm, ''],

	// Remove quotes
	[/"\|'\|`/g, ''],

	// Remove prefixes
	[/\[WebServer\]\|localfirst\:\|auth\:\|automerge-repo\:\|/g, ''],

	// Remove timestamps
	[/\+\d+(ms\|s)/g, ''],
	[/timestamp: \d+/g, ''],

	// Remove snapshot hashes
	[/snapshot,(\w+)/g, ''],

	// Remove %o %s
	[/ %o/g, ''],
	[/ %s/g, ''],

	// replace deviceIds with userNames
	...deviceIds.map(({ userName, deviceId }) => [new RegExp(deviceId, 'g'), userName]),

	// tokenize teamIds as TEAM-1 etc
	...teamIds.map(({ teamId, teamName }) => [new RegExp(teamId, 'g'), teamName]),

	// tokenize documentIds as DOC-1 etc
	...documentIds.map(({ documentId, documentName }) => [new RegExp(documentId, 'g'), documentName]),
	// truncated documentIds
	...documentIds.map(({ documentId, documentName }) => [new RegExp(documentId.slice(0, 5), 'g'), documentName]),

	// Tokenize remaining hashes
	[/\b(?=\w*\d)(\w{10,})\b/g, tokenize],

	// Remove buffers
	[/(<Buffer(\w\|\s\|\.)+(>\|$))\|({\s0:[0-9:,]+})\|\[(\s\|\d\|,)+.?(]\|$)/g, '...'],

	// emoji
	[/noise/gi, '🐒'],
	[/alice/gi, '👩🏾'],
	[/bob/gi, '👨‍🦲'],
	[/charlie/gi, '👳🏽‍♂️'],
	[/dwight/gi, '👴'],
	[/herb/gi, '🤓'],
	[/laptop/gi, '💻'],
	[/localhost/gi, '🤖'],
	[/phone/gi, '📱'],

	// Collapse whitespace
	[/( \|\t)+/g, ' '],
	[/^\s+/gm, ''],
	[/\s+$/gm, ''],
	]
	return transforms.reduce((acc, [rx, replacement]) => acc.replaceAll(rx, replacement), output)
	}

	function readFile(filename) {
	return fs.readFileSync(path.join(outputDir, filename), 'utf8')
	}

	function writeFile(filename, content) {
	const stringified = typeof content === 'string' ? content : JSON.stringify(content)
	fs.writeFileSync(path.join(outputDir, filename), stringified)
	}