Skip to content

Instantly share code, notes, and snippets.

@zarutian

zarutian/cbor.js Secret

Last active June 9, 2023 02:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zarutian/83fd395e759c5853885bc3299046021b to your computer and use it in GitHub Desktop.
Save zarutian/83fd395e759c5853885bc3299046021b to your computer and use it in GitHub Desktop.
CBOR and Agoric endo smallcaps, ocapn implementation draft.
const readUint16 = async (bytesrc) => {
return ((await bytesrc(1)) << 8) + (await bytesrc(1));
};
const readUint32 = async (bytesrc) => {
return (BigInt(await bytesrc(1)) << 24n) +
(BigInt(await bytesrc(1)) << 16n) +
(BigInt(await bytesrc(1)) << 8n) +
(BigInt(await bytesrc(1)));
};
const readUint64 = async (bytesrc) => {
return (BigInt(await bytesrc(1)) << 56n) +
(BigInt(await bytesrc(1)) << 48n) +
(BigInt(await bytesrc(1)) << 40n) +
(BigInt(await bytesrc(1)) << 32n) +
(BigInt(await bytesrc(1)) << 24n) +
(BigInt(await bytesrc(1)) << 16n) +
(BigInt(await bytesrc(1)) << 8n) +
(BigInt(await bytesrc(1)));
};
const readBytes = async (bytesrc, nrOfBytes = 0) => {
const result = [];
nrOfBytes = BigInt(nrOfBytes);
while (nrOfBytes > 0n) {
result.push(await bytesrc(1));
nrOfBytes = nrOfBytes - 1n;
};
return result;
};
const stringref_assign = (str, opts) => {
const ns = opts["stringref-namespace"];
if (ns != undefined) {
if ((str.length < 3) && (ns.length < 23)) { return; }
if ((str.length < 4) && (ns.length < 255)) { return; }
if ((str.length < 5) && (ns.length < 65535)) { return; }
if ((str.length < 7) && (ns.length < 4294967295)) { return; }
if (str.length < 11) { return; }
ns.push(str);
}
};
const readBytestr = async (bytesrc, nrOfBytes = 0, opts) => {
const result = new Uint8Array(await readBytes(bytesrc, nrOfBytes));
stringref_assign(result, opts);
return result;
};
const isBytestr = (specimen) => {
return (specimen instanceof Uint8Array);
};
const concatBytestr = (a, b) => {
// todo: find the proper way to do this but current implementation will
// work for now.
const newLength = a.length + b.length;
const c = new Uint8Array(newLength);
let dest = 0n;
const t1 = (item) => {
c[dest] = item;
dest = dest + 1n;
};
a.forEach(t1);
b.forEach(t1);
return c;
};
const utf8TextDecoder = new TextDecoder("utf-8");
const readUTF8str = async (bytesrc, nrOfBytes, opts) => {
// todo: BOM issues?
const bs = await readBytes(bytesrc, nrOfBytes);
const result = utf8TextDecoder.decode(bs);
stringref_assign(result, opts);
return result;
};
const isUTF8str = (specimen) => {
return (typeof(specimen) == "string");
};
const concatUTF8str = (a, b) => {
return a.concat(b);
};
const stopper = () => {}; // purely for identity
const readItemsUntilStopper = async (bytesrc, cons, isa, concat, opts) => {
let t2 = cons();
while (true) {
const t3 = (await readItem(bytesrc, opts));
if (t3 === stopper) { return t2; }
if (isa(t3)) {
t2 = concat(t2, t3);
} else {
throw new Error("expected a certain kind of datum");
}
}
};
const readArray = async (bytesrc, nrOfItems, opts) => {
nrOfItems = BigInt(nrOfItems);
const result = [];
while (nrOfItems > 0n) {
result.push(await readItem(bytesrc, opts));
nrOfItems = nrOfItems - 1n;
}
return result;
};
const readMap = async (bytesrc, nrOfItemPairs, opts) => {
nrOfItemPairs = BigInt(nrOfItemPairs);
const result = new Map();
while (nrOfItemPairs > 0n) {
const key = await readItem(bytesrc, opts);
const val = await readItem(bytesrc, opts);
result.set(key, val);
nrOfItemPairs = nrOfItemPairs - 1n;
}
return result;
};
const decodeBigInt = (bs) => {
if (!isBytestr(bs)) {
throw new Error("expected a bytestring for a bigint");
}
return bs.reduce((acc, item) => ((acc << 8n) + BigInt(item)), 0n);
};
const decodeBigDecFract = (twople) => {
throw new Error("not yet implemented");
};
const decodeBigFloat = (twople) => {
throw new Error("not yet implemented");
};
const tagHandlers = new Map();
tagHandlers.set(0n, async (tagnr, bytesrc, opts) => {
const item = await readItem(bytesrc, opts);
if (!isUTF8str(item)) {
throw new Error("expected a string for textual Date");
}
// todo: check if it is rfc-3339 formated.
return Date.parse(item);
});
tagHandlers.set(1n, async (tagnr, bytesrc, opts) => {
const item = await readItem(bytesrc, opts);
// todo: check if item is an integer or float
return new Date(item);
});
tagHandlers.set(2n, async (tagnr, bytesrc, opts) => {
const item = await readItem(bytesrc, opts);
if (!isBytestr(item)) {
throw new Error("expected a bytestring for a positive bigint");
}
return decodeBigInt(item);
});
tagHandlers.set(3n, async (tagnr, bytesrc, opts) => {
const item = await readItem(bytesrc, opts);
if (!isBytestr(item)) {
throw new Error("expected a bytestring for a negative bigint");
}
return -(decodeBigInt(item));
});
tagHandlers.set(4n, async (tagnr, bytesrc, opts) => {
const item = await readItem(bytesrc, opts);
if (!(item instanceof Array)) {
throw new Error("expected an array for Big Decimal Fraction");
}
if (item.length != 2) {
throw new Error("expected an two item array for Big Decimal Fraction");
}
const [exponent, mantissa] = item;
if (!isIngeger(exponent) && !isBigInt(exponent)) {
throw new Error("expected the first item, the exponent part of a Big Decimal Fraction, to be an integer");
}
if (!isInteger(mantissa) && !isBigInt(mantissa)) {
throw new Error("expected the second item, the mantissa part of a Big Decimal Fraction, to be an integer");
}
return decodeBigDecFract(item);
});
tagHandlers.set(5n, async (tagnr, bytesrc, opts) => {
const item = await readItem(bytesrc, opts);
if (!(item instanceof Array)) {
throw new Error("expected an array for BigFloat");
}
if (item.length != 2) {
throw new Error("expected an two item array for BigFloat");
}
const [exponent, mantissa] = item;
if (!isIngeger(exponent) && !isBigInt(exponent)) {
throw new Error("expected the first item, the exponent part of a BigFloat, to be an integer");
}
if (!isInteger(mantissa) && !isBigInt(mantissa)) {
throw new Error("expected the second item, the mantissa part of a BigFloat, to be an integer");
}
return decodeBigFloat(item);
});
const unassignedTagnrHandler = async (tagnr, bytesrc, opts) => {
throw new Error("CBOR tagnr not yet assigned by IANA");
};
tagHandlers.set(6n, unassignedTagnrHandler);
tagHandlers.set(7n, unassignedTagnrHandler);
tagHandlers.set(8n, unassignedTagnrHandler);
tagHandlers.set(9n, unassignedTagnrHandler);
tagHandlers.set(10n, unassignedTagnrHandler);
tagHandlers.set(11n, unassignedTagnrHandler);
tagHandlers.set(12n, unassignedTagnrHandler);
tagHandlers.set(13n, unassignedTagnrHandler);
tagHandlers.set(14n, unassignedTagnrHandler);
tagHandlers.set(15n, unassignedTagnrHandler);
const COSE_assignedTagnrHandler = async (tagnr, bytesrc, opts) => {
throw new Error("CBOR tagnr assigned to COSE spec, which is not implemented here");
};
tagHandlers.set(16n, COSE_assignedTagnrHandler);
tagHandlers.set(17n, COSE_assignedTagnrHandler);
tagHandlers.set(18n, COSE_assignedTagnrHandler);
tagHandlers.set(19n, COSE_assignedTagnrHandler);
tagHandlers.set(20n, unassignedTagnrHandler);
tagHandlers.set(21n, async (tagnr, bytesrc, opts) => {
throw new Error("not yet fully implemented");
// Expected conversion to base64url encoding; see Section 3.4.5.2 [RFC8949]
const old_baseDataEncoding = opts["baseDataEncoding"];
opts["baseDataEncoding"] = base64url_encode;
const item = await readItem(bytesrc, opts);
opts["baseDataEncoding"] = old_baseDataEncoding;
return item;
});
tagHandlers.set(22n, async (tagnr, bytesrc, opts) => {
throw new Error("not yet fully implemented");
// Expected conversion to base64 encoding; see Section 3.4.5.2 [RFC8949]
const old_baseDataEncoding = opts["baseDataEncoding"];
opts["baseDataEncoding"] = base64_encode;
const item = await readItem(bytesrc, opts);
opts["baseDataEncoding"] = old_baseDataEncoding;
return item;
});
tagHandlers.set(23n, async (tagnr, bytesrc, opts) => {
throw new Error("not yet implemented");
// note the hex alphabetics is all uppercase
// Expected conversion to base16 encoding; see Section 3.4.5.2 [RFC8949]
const old_baseDataEncoding = opts["baseDataEncoding"];
opts["baseDataEncoding"] = base16_encode;
const item = await readItem(bytesrc, opts);
opts["baseDataEncoding"] = old_baseDataEncoding;
return item;
});
tagHandlers.set(24n, async (tagnr, bytesrc, opts) => {
throw new Error("not yet fully implemented");
// Encoded CBOR data item; see Section 3.4.5.1
const item = await readItem(bytesrc, opts);
if (!isBytestr(item)) {
throw new Error("the encoded CBOR data item must be encapsulated in a bytestring");
}
});
tagHandlers.set(25n, async (tagnr, bytesrc, opts) => {
// stringref
if (opts["stringref-namespace"] == undefined) {
throw new Error("encountered a stringref outside of a stringref-namespace");
}
const item = await readItem(bytesrc, opts);
if ((typeof(item) != "number") && (typeof(item) != "bigint")) {
throw new Error("stringref tagged datum item must be a number");
}
if (item < 0) {
throw new Error("stringref tagged datum item must be a positive number");
}
return (opts["stringref-namespace"])[item];
});
tagHandlers.set(256n, async (tagnr, bytesrc, opts) => {
// stringref-namespace
const old_stringref_namespace = opts["stringref-namespace"];
opts["stringref-namespace"] = [];
const item = await readItem(bytesrc, opts);
opts["stringref-namespace"] = old_stringref_namespace;
return item;
});
const handleTagged = async (tagnr, bytesrc, opts) => {
tagnr = BigInt(tagnr);
default:
item = await readItem(bytesrc, opts);
}
};
const readItem = async (bytesrc, opts = {}) => {
const t1 = await bytesrc(1);
let tagnr;
switch (t1) {
case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x08:
case 0x09:
case 0x0A:
case 0x0B:
case 0x0C:
case 0x0D:
case 0x0E:
case 0x0F:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17: return t1;
case 0x18: return (await bytesrc(1));
case 0x19: return (await readUint16(bytesrc));
case 0x1A: return (await readUint32(bytesrc));
case 0x1B: return (await readUint64(bytesrc));
case 0x1C:
case 0x1D:
case 0x1E:
case 0x1F: throw new Error("cbor decodeItem came across unknown typebyte 0x".concat(t1.toString(16))); // tbd: dec64 (8 bytes follow)
case 0x20:
case 0x21:
case 0x22:
case 0x23:
case 0x24:
case 0x25:
case 0x26:
case 0x27:
case 0x28:
case 0x29:
case 0x2A:
case 0x2B:
case 0x2C:
case 0x2D:
case 0x2E:
case 0x2F:
case 0x30:
case 0x31:
case 0x32:
case 0x33:
case 0x34:
case 0x35:
case 0x36:
case 0x37: return (-1) - (t1 - 0x20);
case 0x38: return (-1) - (await bytesrc(1));
case 0x39: return (-1) - (await readUint16(bytesrc));
case 0x3A: return (-1n) - (await readUint32(bytesrc));
case 0x3B: return (-1n) - (await readUint64(bytesrc));
case 0x3C:
case 0x3D:
case 0x3E:
case 0x3F: throw new Error("cbor decodeItem came across unknown typebyte 0x".concat(t1.toString(16)));
case 0x40:
case 0x41:
case 0x42:
case 0x43:
case 0x44:
case 0x45:
case 0x46:
case 0x47:
case 0x48:
case 0x49:
case 0x4A:
case 0x4B:
case 0x4C:
case 0x4D:
case 0x4E:
case 0x4F:
case 0x50:
case 0x51:
case 0x52:
case 0x53:
case 0x54:
case 0x55:
case 0x56:
case 0x57: return (await readBytestr(bytesrc, (t1 - 0x40)));
case 0x58: return (await readBytestr(bytesrc, (await bytesrc(1))));
case 0x59: return (await readBytestr(bytesrc, (await readUint16(bytesrc))));
case 0x5A: return (await readBytestr(bytesrc, (await readUint32(bytesrc))));
case 0x5B: return (await readBytestr(bytesrc, (await readUint64(bytesrc))));
case 0x5C:
case 0x5D:
case 0x5E: throw new Error("cbor decodeItem came across unknown typebyte 0x".concat(t1.toString(16)));
case 0x5F: return (await readItemsUntilStopper(bytesrc, () => (readBytestr(() => {}, 0)), isBytestr, concatBytestr, opts));
case 0x60:
case 0x61:
case 0x62:
case 0x63:
case 0x64:
case 0x65:
case 0x66:
case 0x67:
case 0x68:
case 0x69:
case 0x6A:
case 0x6B:
case 0x6C:
case 0x6D:
case 0x6E:
case 0x6F:
case 0x70:
case 0x71:
case 0x72:
case 0x73:
case 0x74:
case 0x75:
case 0x76:
case 0x77: return await readUTF8str(bytesrc, (t1 - 0x60), opts);
case 0x78: return await readUTF8str(bytesrc, (await bytesrc(1)), opts);
case 0x79: return await readUTF8str(bytesrc, (await readUint16(bytesrc)), opts);
case 0x7A: return await readUTF8str(bytesrc, (await readUint32(bytesrc)), opts);
case 0x7B: return await readUTF8str(bytesrc, (await readUint64(bytesrc)), opts);
case 0x7C:
case 0x7D:
case 0x7E: throw new Error("cbor decodeItem came across unknown typebyte 0x".concat(t1.toString(16)));
case 0x7F: return (await readItemsUntilStopper(bytesrc, () => "", isUTF8str, concatUTF8str, opts));
case 0x80:
case 0x81:
case 0x82:
case 0x83:
case 0x84:
case 0x85:
case 0x86:
case 0x87:
case 0x88:
case 0x89:
case 0x8A:
case 0x8B:
case 0x8C:
case 0x8D:
case 0x8E:
case 0x8F:
case 0x90:
case 0x91:
case 0x92:
case 0x93:
case 0x94:
case 0x95:
case 0x96:
case 0x97: return (await readArray(bytesrc, (t1 - 0x80), opts));
case 0x98: return (await readArray(bytesrc, (await bytesrc(1)), opts));
case 0x99: return (await readArray(bytesrc, (await readUint16(bytesrc)), opts));
case 0x9A: return (await readArray(bytesrc, (await readUint32(bytesrc)), opts));
case 0x9B: return (await readArray(bytesrc, (await readUint64(bytesrc)), opts));
case 0x9C:
case 0x9D:
case 0x9E: throw new Error("cbor decodeItem came across unknown typebyte 0x".concat(t1.toString(16)));
case 0x9F: return (await readItemsUntilStopper(bytesrc, () => [], (specimen) => true, (a, b) => a.push(b), opts));
case 0xA0:
case 0xA1:
case 0xA2:
case 0xA3:
case 0xA4:
case 0xA5:
case 0xA6:
case 0xA7:
case 0xA8:
case 0xA9:
case 0xAA:
case 0xAB:
case 0xAC:
case 0xAD:
case 0xAE:
case 0xAF:
case 0xB0:
case 0xB1:
case 0xB2:
case 0xB3:
case 0xB4:
case 0xB5:
case 0xB6:
case 0xB7: return (await readMap(bytesrc, (t1 - 0xA0), opts));
case 0xB8: return (await readMap(bytesrc, (await bytesrc(1)), opts));
case 0xB9: return (await readMap(bytesrc, (await readUint16(bytesrc)), opts));
case 0xBA: return (await readMap(bytesrc, (await readUint32(bytesrc)), opts));
case 0xBB: return (await readMap(bytesrc, (await readUint64(bytesrc)), opts));
case 0xBC:
case 0xBD:
case 0xBE: throw new Error("cbor decodeItem came across unknown typebyte 0x".concat(t1.toString(16)));
case 0xBF: return new Map((await readItemsUntilStopper(bytesrc, () => [], (specimen) => true, (a, b) => a.push(b), opts)).reduce(
(acc, item) => {
acc.tmp.push(item);
if (acc.tmp.length == 2) {
acc.result.push(acc.tmp);
acc.tmp = [];
}
return acc;
},
{ result: [], tmp: [] },
).result);
case 0xC0:
case 0xC1:
case 0xC2:
case 0xC3:
case 0xC4:
case 0xC5:
case 0xC6:
case 0xC7:
case 0xC8:
case 0xC9:
case 0xCA:
case 0xCB:
case 0xCC:
case 0xCD:
case 0xCE:
case 0xCF:
case 0xD0:
case 0xD1:
case 0xD2:
case 0xD3:
case 0xD4:
case 0xD5:
case 0xD6:
case 0xD7: return await handleTagged((t1 - 0xC0), bytesrc, opts);
case 0xD8:
tagnr = await bytesrc(1);
return await handleTagged(tagnr, bytesrc, opts);
case 0xD9:
tagnr = await readUint16(bytesrc);
return await handleTagged(tagnr, bytesrc, opts);
case 0xDA:
tagnr = await readUint32(bytesrc);
return await handleTagged(tagnr, bytesrc, opts);
case 0xDB:
tagnr = await readUint64(bytesrc);
return handleTagged(tagnr, bytesrc, opts);
case 0xF9: // half-precision float (two-byte IEEE 754)
case 0xFA: // single-precision float (four-byte IEEE 754)
case 0xFB: // double-precision float (eight-byte IEEE 754)
case 0xFC: // tbd: posit 16 bit (2 bytes)
case 0xFD: // tbd: posit 32 bit (4 bytes)
case 0xFE: // tbd: posit 64 bit (8 bytes)
case 0xFF: return stopper;
}
}

References: https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml https://www.rfc-editor.org/rfc/rfc8949.html
https://github.com/endojs/endo/blob/master/packages/marshal/src/encodeToSmallcaps.js https://gist.github.com/zarutian/fb21d0a8c910ab255401 þá sérstaklega msgpck_ext_nr trxið en færa það yfir á e-r unassigned cbor tagnr. Hví? til að minnka bæti sem þarf að fara yfir vírinn.

Sjá hér fyrir sýnidæmi.


  D8        - cbortag, one byte
  1B        - tagnr 27
  86        - array of length 6 items
    6B      - utf-8 string length 11 bytes   (1st item)
      6F    - "o"
      70    - "p"
      3A    - ":"
      64    - "d"
      65    - "e"
      6C    - "l"
      69    - "i"
      76    - "v"
      65    - "e"
      72    - "r"
    D8      - cbortag            ( 2nd item of outermost array )
    1B      - tagnr 27
    82      - array of two items
      6B    - utf-8 string 11 bytes  (1st)
        64  - "d"
        65  - "e"
        73  - "s"
        63  - "c"
        3A  - ":"
        65  - "e"
        78  - "x"
        70  - "p"
        6F  - "o"
        72  - "r"
        74  - "t"
      18    - unsigned int  (2nd)
      2B    - 43
    63      - utf-8 string 3 bytes  (3rd item of outermost array)
      66    - "f"
      6F    - "o"
      6F    - "o"
    80      - array of zero items (4th)
    A0      - empty map           (5th)
    04      - unsigned int 4      (6th)
    
    <11'op:deliver<11'desc:export0+>12"makeSeqBlock[[
        [11"applyMethod
          1+
          [4"@ref
            <13'desc:Promise3
              [28"aliceVat.weasledyne.com:8032]
              12"§aliceVatId§
              41+
              <22'desc:import-new-object10+>
            >
          ]
          7"resolve
          [4"@arr[4"@sba0+]]
        ]
        [11'applyMethod
          2+
          [4"@ref
            <13'desc:Promise3
              [27"bobsVat.vat-locator.is:443]
              10"§bobVatId§
              42+
              <22'desc:import-new-object11+>
            >
          ]
          7"resolve
          [4"@arr[4"@sba0+]]
        ]
        ]]{}5+>
  D8          - cbor tag, one byte
  1B          - tagnr 27
  86          - array of length 6 items
    6B        - utf-8 string length 11 bytes   (1st item)
      6F      - "o"
      70      - "p"
      3A      - ":"
      64      - "d"
      65      - "e"
      6C      - "l"
      69      - "i"
      76      - "v"
      65      - "e"
      72      - "r"
    D8        - cbortag            ( 2nd item of outermost array )
    1B        - tagnr 27
    82        - array of two items
      6B      - utf-8 string 11 bytes  (1st)
        64    - "d"
        65    - "e"
        73    - "s"
        63    - "c"
        3A    - ":"
        65    - "e"
        78    - "x"
        70    - "p"
        6F    - "o"
        72    - "r"
        74    - "t"
      00      - unsigned int 0
    6C        - utf-8 string, 12 bytes
      6D      - "m"
      61      - "a"
      6B      - "k"
      65      - "e"
      53      - "S"
      65      - "e"
      71      - "q"
      42      - "B"
      6C      - "l"
      6F      - "o"
      64      - "c"
      6B      - "k"
    81        - array with single item
      82      - array with two items
        85    - array with five items
          6B  - utf-8 string 11 bytes
            61 - "a"
            70 - "p"
            70 - "p"
            6C - "l"
            79 - "y"
            4D - "M"
            65 - "e"
            75 - "t"
            68 - "h"
            6F - "o"
            64 - "d"
          01   - unsigned int 1
          82   - array with two items
            64 - utf-8 string 4 bytes
              40 - "@"
              72 - "r"
              65 - "e"
              66 - "f"
            D8   - cbortag
            1B   - 27
            85   - array with five items
              6D - utf-8 string 13 bytes
                64 - "d"
                65 - "e"
                73 - "s"
                63 - "c"
                3A - ":"
                50 - "P"
                72 - "r"
                6F - "o"
                6D - "m"
                69 - "i"
                74 - "s"
                65 - "e"
                33 - "3"
              81   - array with single item
                78 - utf-8 string 
                1C - 28 bytes
                  61 - "a"
                  6C - "l"
                  69 - "i"
                  63 - "c"
                  65 - "e"
                  56 - "V"
                  61 - "a"
                  74 - "t"
                  2E - "."
            
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment