Skip to content

Instantly share code, notes, and snippets.

@GavinRay97
Created December 23, 2022 21:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save GavinRay97/c14698330601fbe60ce98aa7526dad85 to your computer and use it in GitHub Desktop.
Save GavinRay97/c14698330601fbe60ce98aa7526dad85 to your computer and use it in GitHub Desktop.
Postgres wire protocol extraction from DOM on docs page
// Regex for lines like "Terminate (F)"
// Where the first group is the name of the message and the second is the kind (Backend/Frontend)
const pgMessageTypeRegex = /^(?<command>\w+) \((?<kind>\w)\)$/gm
// Regex for the type of a variable, like:
// - Int16, Int32, Byten, Byte4, String
// - Byte1('S')
// - Int32(4)
// - Int16[N]
// Where:
// - The first group is the type (Int16, Int32, Byten, Byte4, String, Byte1, Int32, etc)
// - For types like Int32(4), there is a second group with the value (4)
// - For Byte1, the second group is the character value (e.g. 'S')
// - For types like Int16[N], there is a third group with the array size (N)
const fieldTypeRegex =
/^(?<type>Int16|Int32|Byten|Byte4|String|Byte1|Int32|Int16)(?:\((?<value>.+)\))?(?:\[(?<arraySize>\w+)\])?$/gm
function parsePostgresWireProtocolDocs(document: Document) {
const messageFormatContainer = document.querySelector("#docContent dl.variablelist")
if (!messageFormatContainer) {
throw new Error("Could not find message format container")
}
const messages = {} as {
[messageName: string]: {
name: string
type: string
description: string
value?: string
arraySize?: string
}[]
}
// Because the DOM is not structured properly, we need to iterate until we hit the next message
for (const el of messageFormatContainer.querySelectorAll("dt[id]")) {
// Get the name out of the <span> element
const name = el.querySelector("span.term").textContent
// Add the message to the messages object
messages[name] = []
// Iterate through the <dl.variablist> elements, each holding a set of fields
for (const field of el.nextElementSibling.querySelectorAll("dl.variablelist")) {
// Loop through the field and description elements
// <dt><span class="term">Byte1('B')</span></dt>
// <dd>
// <p>Identifies the message as a Bind command.</p>
// </dd>
for (const f of field.querySelectorAll("dt, dd")) {
// We will either have a <dt> or a <dd> element
// If it's a <dt> element, it's the name of the field
// If it's a <dd> element, it's the description of the field
switch (f.tagName) {
case "DT":
const name = f.querySelector("span.term").textContent
// Use regex to get the type of the field
const matches = name.matchAll(new RegExp(fieldTypeRegex))
for (const match of matches) {
const { type, value, arraySize } = match.groups
messages[name].push({
name,
type,
description: "",
value,
arraySize,
})
}
break
case "DD":
const fieldDescription = f.querySelector("p").textContent
messages[name][messages[name].length - 1].description = fieldDescription
break
}
}
}
}
return messages
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment