Skip to content

Instantly share code, notes, and snippets.

@Kreijstal
Last active April 30, 2024 22:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Kreijstal/f0297ee88d71974cc2fb60ff348ad301 to your computer and use it in GitHub Desktop.
Save Kreijstal/f0297ee88d71974cc2fb60ff348ad301 to your computer and use it in GitHub Desktop.
binary parser combinator js
function Result(tag, value) {
this.tag = tag;
this.value = value;
}
Result.ok = function(value) {
return new Result("ok", value);
};
Result.error = function( value) {
return new Result("error", value);
};
Result.prototype.unwrap = function() {
if (this.tag === "ok") {
return this.value;
} else {
throw new Error("called `Result.unwrap()` on an `Error` value");
}
};
Result.prototype.expect = function( message) {
if (this.tag === "ok") {
return this.value;
} else {
throw new Error(message);
}
};
Result.prototype.okMap = function(fn) {
if (this.tag === "ok") {
return Result.ok(fn(this.value));
} else {
return this;
}
};
Result.prototype.errMap = function(fn) {
if (this.tag === "error") {
return Result.error(fn(this.value));
} else {
return this;
}
};
Result.prototype.isErr = function() {
return this.tag === "error";
};
Result.prototype.isOk = function() {
return this.tag === "ok";
};
/**
* @template T
* @typedef ParserOk
* @prop {"ok"} tag
* @prop {ParserNext<T>} value
*/
/**
* @typedef ParserErr
* @prop {"error"} tag
* @prop {any} value
*/
/**
* @template T
* @typedef {ParserOk<T>|ParserErr} ParserResult
*/
/**
* @template T
* @typedef Address
* @prop {number} start
* @prop {number} end
* @prop {T} value
*
*/
function ByteSlice(start, end, valueGetter, valueSetter, defaultValue) {
this.start = start ?? null;
this.end = end ?? null;
this._value = defaultValue !== undefined ? defaultValue : null;
var self = this;
Object.defineProperty(this, 'value', {
get: valueGetter || function() {
return self._value;
},
set: valueSetter || function(newValue) {
self._value = newValue;
},
enumerable: true,
configurable: true
});
}
ByteSlice.prototype.getByteSlice = function(bytes) {
if (this.start === null || this.end === null || this.start < 0 || this.end > bytes.length) {
throw new Error('Invalid start or end index');
}
return bytes.slice(this.start, this.end);
};
/**
* @template T
* @typedef ParserEnum
* @prop {string} tag
* @prop {Address<T>} value
*/
function ParseEnum(string, value) {
this.tag = string;
this.value = value;
}
ParseEnum.empty = function(i) {
return new ParseEnum("empty", new ByteSlice(i, i, null, null, null));
}
ParseEnum.prototype.concat = function(a) {
// 4 cases
// case 1 no concat at all
if (this.tag !== "concat" && a.tag !== "concat") {
return new ParseEnum("concat", new ByteSlice(this.value.start, a.value.end, null, null, [this, a]));
} else // case 2 1 concat the other not
if (this.tag === "concat" && a.tag !== "concat") {
return new ParseEnum("concat", new ByteSlice(this.value.start, a.value.end, null, null, [...this.value.value, a]));
} else // case 3 1 concat the other not reverse
if (this.tag !== "concat" && a.tag === "concat") {
return new ParseEnum("concat", new ByteSlice(this.value.start, a.value.end, null, null, [this, ...a.value.value]));
} else // case 3 1 concat the other not reverse
if (this.tag === "concat" && a.tag === "concat") {
return new ParseEnum("concat", new ByteSlice(this.value.start, a.value.end, null, null, [...this.value.value, ...a.value.value]));
}
}
/**
* @template T
* @typedef ParserNext
* @prop {number} nextIndex
* @prop {ParserEnum<T>} value
*/
/**
* Callback for adding two numbers.
* @template T
* @callback ParserCallback
* @param {number} index
* @param {Uint8Array} bytes
* @returns {ParserResult<T>}
*/
//My goal is that every parser result contains a mapping to the original address, that is why all values, are wrapped in an Address,
/**
* \@\\type {ParserCallback<number>}
*/
function u1(index, bytes) {
if (index >= bytes.length) {
return Result.error(`u1: Index out of bounds: Index ${index} exceeds the length of the byte array (${bytes.length})`);
}
var start = index;
var end = index + 1;
var u1Enum = new ParseEnum("u1", new ByteSlice(start, end,
function() { return bytes[start]; },
function(newValue) {
if (newValue >= 0 && newValue <= 255) {
bytes[start] = newValue;
} else {
throw new Error("Invalid value for u1: " + newValue);
}
},
bytes[start]
));
return Result.ok({ nextIndex: end, value: u1Enum });
}
/**
* \\@\\param {ParserCallback<any>} parser
* @param { number} n
*/
function applyNTimes(parser, n) {
return function (index, bytes) {
let currentIndex = index;
let values = [];
for (let i = 0; i < n; i++) {
const result = parser(currentIndex, bytes);
if (result.isErr()) {
return result.errMap((_) => [
"applyNTimes:" +
n +
",iteration:" +
i +
",index:" +
currentIndex.toString(16),
_,
values
]); // Propagate the error
}
const { value, nextIndex } = result.unwrap();
values.push(value);
currentIndex = nextIndex;
}
const repeatEnum = new ParseEnum("repeatEnum", new ByteSlice(index, currentIndex,
function() { return values; },
function(newValue) { throw new Error("Cannot set value of repeatEnum"); },
values
));
return Result.ok({ nextIndex: currentIndex, value: repeatEnum });
};
}
/**
* @param {number} index
* @param {{ length: number; buffer: ArrayBufferLike & { BYTES_PER_ELEMENT?: undefined; }; }} bytes
*/
function u2be(index, bytes) {
if (index + 1 >= bytes.length) {
return Result.error('Index out of bounds');
}
var start = index;
var end = index + 2;
var u2beEnum = new ParseEnum("u2be", new ByteSlice(start, end,
function() {
return new DataView(bytes.buffer).getUint16(start, false);
},
function(newValue) {
if (newValue >= 0 && newValue <= 65535) {
new DataView(bytes.buffer).setUint16(start, newValue, false);
} else {
throw new Error('Invalid value for u2be: ' + newValue);
}
}
));
return Result.ok({ nextIndex: end, value: u2beEnum });
}
/**
* @param {number} index
* @param {{ length: number; buffer: ArrayBufferLike & { BYTES_PER_ELEMENT?: undefined; }; }} bytes
*/
function u2le(index, bytes) {
if (index + 1 >= bytes.length) {
return Result.error('Index out of bounds');
}
var start = index;
var end = index + 2;
var u2leEnum = new ParseEnum('u2le', new ByteSlice(start, end,
function() {
return new DataView(bytes.buffer).getUint16(start, true);
},
function(newValue) {
if (newValue >= 0 && newValue <= 65535) {
new DataView(bytes.buffer).setUint16(start, newValue, true);
} else {
throw new Error('Invalid value for u2le: ' + newValue);
}
}
));
return Result.ok({ nextIndex: end, value: u2leEnum });
}
/**
* @param {number} index
* @param {{ length: number; buffer: ArrayBufferLike & { BYTES_PER_ELEMENT?: undefined; }; }} bytes
*/
function u4be(index, bytes) {
if (index + 3 >= bytes.length) {
return Result.error('Index out of bounds');
}
var start = index;
var end = index + 4;
var u4beEnum = new ParseEnum('u4be', new ByteSlice(start, end,
function() {
return new DataView(bytes.buffer).getUint32(start, false);
},
function(newValue) {
if (newValue >= 0 && newValue <= 4294967295) {
new DataView(bytes.buffer).setUint32(start, newValue, false);
} else {
throw new Error('Invalid value for u4be: ' + newValue);
}
}
));
return Result.ok({ nextIndex: end, value: u4beEnum });
}
/**
* @param {number} index
* @param {{ length: number; buffer: ArrayBufferLike & { BYTES_PER_ELEMENT?: undefined; }; }} bytes
*/
function u4le(index, bytes) {
if (index + 3 >= bytes.length) {
return Result.error('Index out of bounds');
}
var start = index;
var end = index + 4;
var u4leEnum = new ParseEnum('u4le', new ByteSlice(start, end,
function() {
return new DataView(bytes.buffer).getUint32(start, true);
},
function(newValue) {
console.log("you are setting me")
if (newValue >= 0 && newValue <= 4294967295) {
new DataView(bytes.buffer).setUint32(start, newValue, true);
} else {
throw new Error('Invalid value for u4le: ' + newValue);
}
}
));
return Result.ok({ nextIndex: end, value: u4leEnum });
}
/**
* @param {number} index
* @param {{ length: number; buffer: ArrayBufferLike; }} bytes
*/
function u8be(index, bytes) {
if (index + 7 >= bytes.length) {
return Result.error('Index out of bounds');
}
var start = index;
var end = index + 8;
var u8beEnum = new ParseEnum('u8be', new ByteSlice(start, end,
function() {
var array = new Uint8Array(bytes.buffer, start, 8);
var result = BigInt(0);
for (var i = 0; i < 8; i++) {
result = (result << 8n) + BigInt(array[i]);
}
return result;
},
function(newValue) {
if (newValue >= 0n && newValue <= 18446744073709551615n) {
var array = new Uint8Array(bytes.buffer, start, 8);
for (var i = 0; i < 8; i++) {
array[7 - i] = Number((newValue >> BigInt(8 * i)) & 0xffn);
}
} else {
throw new Error('Invalid value for u8be: ' + newValue);
}
}
));
return Result.ok({ nextIndex: end, value: u8beEnum });
}
/**
* @param {number} index
* @param {{ length: number; buffer: ArrayBufferLike; }} bytes
*/
function u8le(index, bytes) {
if (index + 7 >= bytes.length) {
return Result.error('Index out of bounds');
}
var start = index;
var end = index + 8;
var u8leEnum = new ParseEnum('u8le', new ByteSlice(start, end,
function() {
var array = new Uint8Array(bytes.buffer, start, 8);
var result = BigInt(0);
for (var i = 0; i < 8; i++) {
result = (result << 8n) + BigInt(array[7 - i]);
}
return result;
},
function(newValue) {
if (newValue >= 0n && newValue <= 18446744073709551615n) {
var array = new Uint8Array(bytes.buffer, start, 8);
for (var i = 0; i < 8; i++) {
array[i] = Number((newValue >> BigInt(8 * i)) & 0xffn);
}
} else {
throw new Error('Invalid value for u8le: ' + newValue);
}
}
));
return Result.ok({ nextIndex: end, value: u8leEnum });
}
/**
* @param {number} index
* @param {{ length: number; buffer: ArrayBufferLike & { BYTES_PER_ELEMENT?: undefined; }; }} bytes
*/
function f4be(index, bytes) {
if (index + 3 >= bytes.length) {
return Result.error("Index out of bounds");
}
var start = index;
var end = index + 4;
var f4beEnum = new ParseEnum("f4be", new ByteSlice(start, end,
function() {
return new DataView(bytes.buffer).getFloat32(start, false);
},
function(newValue) {
new DataView(bytes.buffer).setFloat32(start, newValue, false);
}
));
return Result.ok({ nextIndex: end, value: f4beEnum });
}
/**
* @param {number} index
* @param {Uint8Array} bytes
*/
function f4le(index, bytes) {
if (index + 3 >= bytes.length) {
return Result.error("Index out of bounds");
}
var start = index;
var end = index + 4;
var f4leEnum = new ParseEnum("f4le", new ByteSlice(start, end,
function() {
return new DataView(bytes.buffer).getFloat32(start, true);
},
function(newValue) {
new DataView(bytes.buffer).setFloat32(start, newValue, true);
}
));
return Result.ok({ nextIndex: end, value: f4leEnum });
}
/**
* @param {any[]} parsers
*/
function concatP(parsers) {
return function (index, bytes) {
let currentIndex = index;
const results = [];
let start = -1;
let end = -1;
for (let i = 0; i < parsers.length; i++) {
const parser = parsers[i];
const result = parser(currentIndex, bytes);
// If the parser returns an error, propagate the error.
if (result.isErr()) {
return result.errMap((_) => ["concatP", _]);
}
const { nextIndex, value } = result.unwrap();
if (!(Object.keys(value).length == 2 && "value" in value && "tag" in value)) {
return Result.error([
"concatP:Parser returns ok, but it must be object {tag,value}, returned: ",
value
]);
}
// Set the start index to the start of the first parsed value.
if (start === -1) {
start = value.value.start; // Assumes that `value` is an enum object with a `value` property.
}
// Set/update the end index to the end of the last parsed value.
end = value.value.end;
results.push(value);
currentIndex = nextIndex;
}
const concatEnum = new ParseEnum("concat", new ByteSlice(start, end,
function() {
// Returns an array of the values of the individual parsers.
return results;
}
));
return Result.ok({ nextIndex: currentIndex, value: concatEnum });
};
}
/**
* @param {number} n
*/
function takeN(n) {
return function (index, bytes) {
if (index + n > bytes.length) {
return Result.error(
`takeN: Index out of bounds: Required ${n} bytes but only ${
bytes.length - index
} bytes available`
);
}
var start = index;
var end = index + n;
var bufferView = new ParseEnum("bufferView", new ByteSlice(start, end,
function() {
return bytes.slice(start, end);
},
function(newValue) {
if (newValue.length === n) {
for (var i = 0; i < n; i++) {
bytes[start + i] = newValue[i];
}
} else {
throw new Error(
"Invalid length for buffer view: " + newValue.length
);
}
}
));
return Result.ok({ nextIndex: end, value: bufferView });
};
}
function acceptAndDoNothing(index, bytes) {
// Since this parser does nothing, it always returns an `ok` result with an empty value.
return Result.ok({
nextIndex: index,
value: new ParseEnum("empty", new ByteSlice(index, index, function() { return null; }))
});
}
//Maps over parseEnums
/**
* @template T
* @template U
* @param {(arg0: ParserEnum<T>) => ParserEnum<U>} transformFn
*/
function mapParser(parser, transformFn) {
return function( index, bytes) {
// Apply the given parser first.
const result = parser(index, bytes);
// If the parser returns an error, propagate the error.
if (result.isErr()) {
return result;
}
// Extract the result value and next index.
const { value, nextIndex } = result.unwrap();
// Apply the transformation function to the value.
const transformedValue = transformFn(value);
// Return a new Result object with the transformed value.
return Result.ok({ nextIndex: nextIndex, value: transformedValue });
};
}
/**
* @param {(arg0: any, arg1: any) => any} parser
* @param {(arg0: ParserEnum<any>) => any} transformFn
*/
function flatMapParser(parser, transformFn) {
return function ( index, bytes) {
// Apply the given parser first.
const result = parser(index, bytes);
// If the parser returns an error, propagate the error.
if (result.isErr()) {
return result.errMap(( _) => ["flatMapParser", _]);
}
// Extract the result value and next index.
/** @type {ParserNext<any>} */
const { value, nextIndex } = result.unwrap();
if (!(Object.keys(value).length == 2 && "value" in value && "tag" in value)) {
return Result.error([
"flatMapParser:Parser returns ok, but it must be object {tag,value}, returned: ",
value
]);
}
// Apply the transformation function to get the next parser.
const nextParser = transformFn(value);
const nextParsedResult=nextParser(nextIndex, bytes);
if (nextParsedResult.isErr()) {
//("Subsequent parser failed in bindParser following successful initial parse. Initial result: ", result, "Error in subsequent parser: ", nextParsedResult.error, "in string: ", remaining)
return nextParsedResult.errMap(( _) => ["flatMapParser: Initial result: ", result, "Error in subsequent parser: ", _]);
}
return nextParsedResult;
};
}
/**
* @param {{ value: { value: any; }; }} lengthEnum
*/
function parseBytesBasedOnLength(lengthEnum) {
var lengthValue = lengthEnum.value.value; // The actual length value
// We return a new parser that will take the specified number of bytes based on the length value.
return takeN(lengthValue);
}
/**
* @param {Array<Parser>} parsers
*/
function orParser(parsers) {
return function (index, bytes) {
for (const parser of parsers) {
const result = parser(index, bytes);
if (result.isOk()) {
return result;
}
}
return Result.error(`orParser: None of the parsers matched at index ${index}`);
};
}
function chainParser(parser, transformFn) {
return function (index, bytes) {
const result = parser(index, bytes);
if (result.isErr()) {
return result.errMap((_) => ["chainParser", _]);
}
const { value, nextIndex } = result.unwrap();
if (!(Object.keys(value).length == 2 && "value" in value && "tag" in value)) {
return Result.error([
"chainParser:Parser returns ok, but it must be object {tag,value}, returned: ",
value
]);
}
const nextParser = transformFn(value);
const nextParsedResult = nextParser(nextIndex, bytes);
if (nextParsedResult.isErr()) {
return nextParsedResult.errMap((_) => ["chainParser: Initial result: ", result, "Error in subsequent parser: ", _]);
}
const concatEnum = new ParseEnum("concat", new ByteSlice(
value.value.start,
nextParsedResult.unwrap().value.end,
() => [value, nextParsedResult.unwrap().value]
));
return Result.ok({ nextIndex: nextParsedResult.unwrap().nextIndex, value: concatEnum });
};
}
/**
* Flattens the result of two parsers and concatenates their results.
* @param {Parser} firstParser - The first parser to be applied.
* @param {Parser} secondParser - The second parser to be applied after the first.
* @returns {Parser} - A parser that applies both parsers in sequence and concatenates their results without nesting.
*/
function flattenConcat(firstParser, secondParser) {
return function (index, bytes) {
// Apply the first parser
const firstResult = firstParser(index, bytes);
if (firstResult.isErr()) {
return firstResult; // Immediately return with error if first parser fails
}
const { value: firstValue, nextIndex: indexAfterFirst } = firstResult.unwrap();
// Apply the second parser using the updated index
const secondResult = secondParser(indexAfterFirst, bytes);
if (secondResult.isErr()) {
return secondResult; // Immediately return with error if second parser fails
}
const { value: secondValue, nextIndex: indexAfterSecond } = secondResult.unwrap();
// Flatten the result values
const combinedValues = [];
if (firstValue.tag === "concat" && Array.isArray(firstValue.value.value)) {
combinedValues.push(...firstValue.value.value);
} else {
combinedValues.push(firstValue);
}
if (secondValue.tag === "concat" && Array.isArray(secondValue.value.value)) {
combinedValues.push(...secondValue.value.value);
} else {
combinedValues.push(secondValue);
}
// Create a new `ParseEnum` with tag "concat" containing all results in a flat array
const flatConcatEnum = new ParseEnum("concat", new ByteSlice(firstValue.value.start, secondValue.value.end, function() {
return combinedValues; // Return a flat array of results
}));
return Result.ok({ nextIndex: indexAfterSecond, value: flatConcatEnum });
};
}
/**
* @param {(arg0: any, arg1: any) => any} parser
*/
function expectP(parser) {
// Default error message formatter that takes expected and actual values and returns a formatted error message.
/**
* @param {{ toString: (arg0: number) => string; }} expected
* @param {{ toString: (arg0: number) => string; }} actual
*/
function defaultErrorMessageFormatter(expected, actual) {
const expectedFormatted =
typeof expected === "function"
? "the provided condition"
: `0x${expected.toString(16).toUpperCase()}`;
const actualFormatted = `0x${actual.toString(16).toUpperCase()}`;
return `Expected value to be ${expectedFormatted}, but found ${actualFormatted}.`;
}
return {
// The toBe method: creates a parser that asserts the value to be equal to `val`.
// The `err` is an optional custom error message or formatter function.
toBe: function (val, err) {
const errorMessageFormatter = err || defaultErrorMessageFormatter;
return function ( index, bytes) {
const result = parser(index, bytes);
// If the parser failed, propagate the error.
if (result.isErr()) {
return result;
}
const { value, nextIndex } = result.unwrap();
const actualValue = value.value.value;
const checkPassed =
typeof val === "function" ? val(actualValue) : actualValue === val;
// If check fails, return an error with the formatted message using the provided or default formatter.
if (!checkPassed) {
const errorMessage =
typeof err === "function"
? err(actualValue)
: errorMessageFormatter(val, actualValue);
return Result.error(errorMessage);
}
// If the check passes, return the result unchanged.
return result;
};
}
};
}
//
/**
* @param {string} tag
* @param {any} fields
* \@\\returns {(index: number, bytes: Uint8Array) => ({tag:"ok",value:{nextIndex:number,value:{tag:string,value:{start:number,end:number,value:any}}}}|{tag:"error",value:any})}
*/
function createConstantInfoParser(tag, fields) {
return function (index, bytes) {
let currentIndex = index;
let start = -1;
let end = -1;
const valueObject = {};
for (const [fieldParser, key] of fields) {
const result = fieldParser(currentIndex, bytes);
if (result.isErr()) {
return result.errMap((_) => ["createConstantInfoParser", tag, _]); // Propagate the error
}
const { nextIndex, value } = result.unwrap();
if (!(Object.keys(value).length == 2 && "value" in value && "tag" in value)) {
return Result.error([
"createConstantInfoParser:Parser returns ok, but it must be object {tag,value}, returned: ",
value
]);
}
valueObject[key] = value.value;
currentIndex = nextIndex;
}
const parsedResult = new ParseEnum(tag, new ByteSlice(index, currentIndex, null,null, valueObject ));
return Result.ok({ nextIndex: currentIndex, value: parsedResult });
};
}
/**
* @param {Result[]} parseResults
*/
function reduceParseResults(parseResults) {
let accumulatedEnums = [];
let currentIndex = 0;
let initialIndex =
parseResults.length > 0 ? parseResults[0].value.value.value.start : 0;
// Process each result in the array of parse results
for (let i = 0; i < parseResults.length; i++) {
const result = parseResults[i];
if (result.tag === "error") {
// If any result is an error, return this error, stopping further processing
return Result.error(result.value);
}
// Update the current working index
currentIndex = result.value.nextIndex;
// Accumulate the result values only if they are successful
accumulatedEnums.push(result.value.value);
// Dynamically update the ending index
if (i === parseResults.length - 1) {
// Adjust the end to coincide with the end of the last successful parse
accumulatedEnums[i].value.end = result.value.value.end;
}
}
const resultEnum = new ParseEnum("concatEnums", new ByteSlice(initialIndex, currentIndex,
null,null, accumulatedEnums // Store the accumulated enums directly
));
return Result.ok({
nextIndex: currentIndex,
value: resultEnum
});
}
function parseWhile(parser, predicate, reducer, start,accu) {//should NOT be called externally lmao if you do god help you
//Preserving types of predicate and reducer is extremely important
//return flatMapParser(parser,) commented out because it would be nice but remember, no infinite recursion here
return function (index, bytes) {
let currentIndex = index;
var accumulator = start;
while (currentIndex < bytes.length) {
var result = reducer(accumulator, parser, currentIndex, bytes);
accumulator = result;
currentIndex = result[1][0].value.nextIndex;
if (!predicate(...result)) {
break; // Exit the loop if the predicate is false
}
}
return accu(result);
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment