Created
December 23, 2022 21:20
-
-
Save GavinRay97/c14698330601fbe60ce98aa7526dad85 to your computer and use it in GitHub Desktop.
Postgres wire protocol extraction from DOM on docs page
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Regex for lines like "Terminate (F)" | |
// Where the first group is the name of the message and the second is the kind (Backend/Frontend) | |
const pgMessageTypeRegex = /^(?<command>\w+) \((?<kind>\w)\)$/gm | |
// Regex for the type of a variable, like: | |
// - Int16, Int32, Byten, Byte4, String | |
// - Byte1('S') | |
// - Int32(4) | |
// - Int16[N] | |
// Where: | |
// - The first group is the type (Int16, Int32, Byten, Byte4, String, Byte1, Int32, etc) | |
// - For types like Int32(4), there is a second group with the value (4) | |
// - For Byte1, the second group is the character value (e.g. 'S') | |
// - For types like Int16[N], there is a third group with the array size (N) | |
const fieldTypeRegex = | |
/^(?<type>Int16|Int32|Byten|Byte4|String|Byte1|Int32|Int16)(?:\((?<value>.+)\))?(?:\[(?<arraySize>\w+)\])?$/gm | |
function parsePostgresWireProtocolDocs(document: Document) { | |
const messageFormatContainer = document.querySelector("#docContent dl.variablelist") | |
if (!messageFormatContainer) { | |
throw new Error("Could not find message format container") | |
} | |
const messages = {} as { | |
[messageName: string]: { | |
name: string | |
type: string | |
description: string | |
value?: string | |
arraySize?: string | |
}[] | |
} | |
// Because the DOM is not structured properly, we need to iterate until we hit the next message | |
for (const el of messageFormatContainer.querySelectorAll("dt[id]")) { | |
// Get the name out of the <span> element | |
const name = el.querySelector("span.term").textContent | |
// Add the message to the messages object | |
messages[name] = [] | |
// Iterate through the <dl.variablist> elements, each holding a set of fields | |
for (const field of el.nextElementSibling.querySelectorAll("dl.variablelist")) { | |
// Loop through the field and description elements | |
// <dt><span class="term">Byte1('B')</span></dt> | |
// <dd> | |
// <p>Identifies the message as a Bind command.</p> | |
// </dd> | |
for (const f of field.querySelectorAll("dt, dd")) { | |
// We will either have a <dt> or a <dd> element | |
// If it's a <dt> element, it's the name of the field | |
// If it's a <dd> element, it's the description of the field | |
switch (f.tagName) { | |
case "DT": | |
const name = f.querySelector("span.term").textContent | |
// Use regex to get the type of the field | |
const matches = name.matchAll(new RegExp(fieldTypeRegex)) | |
for (const match of matches) { | |
const { type, value, arraySize } = match.groups | |
messages[name].push({ | |
name, | |
type, | |
description: "", | |
value, | |
arraySize, | |
}) | |
} | |
break | |
case "DD": | |
const fieldDescription = f.querySelector("p").textContent | |
messages[name][messages[name].length - 1].description = fieldDescription | |
break | |
} | |
} | |
} | |
} | |
return messages | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment