Skip to content

Instantly share code, notes, and snippets.

@jspears
Last active March 17, 2022 17:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jspears/7ef9ab04a568de0d85475eb229bfe4b8 to your computer and use it in GitHub Desktop.
Save jspears/7ef9ab04a568de0d85475eb229bfe4b8 to your computer and use it in GitHub Desktop.
Xml (with namespaces) parsed with TypeScript Types

Something dumb to do for dumb reasons. This isn't a good idea, or well implemented. But I learned a few things, and thought I'd share.

//These types parse XML. With some caveats.
//  - depends on what you mean by parsing.
//  - practical applications.
// 
// Bugs:
//     - Will parse malformed documents, particularly root elements.
//     - Serialization is not really namespace aware.
//     - API does not align with DOM API.

interface Namespaces extends Record<string, string> {
}

type ALPHA = 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z';
type Alpha = ALPHA | Uppercase<ALPHA>;
type NUMBS = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9';
type VALID_CHARS = ALPHA | Uppercase<ALPHA> | '_';
type Special = '_' | '-' | '.' | ':';

type Trim<T> = T extends (` ${infer V}` | `${infer V} ` | `${infer V}\n`) ? Trim<V> : T;
type isIn<T, Group> = T extends `${infer First}${infer Rest}` ? First extends Group ? Rest extends '' ? true : isIn<Rest, Group> : false : false;
type isTagValid<T> = T extends `${infer First}${infer Rest}` ? isIn<First, Alpha | '_'> extends true ? Rest extends '' ? true : isIn<Rest, Alpha | NUMBS | Special> : false : false;
type ParseTag<T, Ret extends string = ''> = T extends `${infer First}${infer Rest}` ? isTagValid<`${Ret}${First}`> extends true ? ParseTag<Rest, `${Ret}${First}`> : [Ret, T] : [Ret, T];
type StartTag<T> = T extends `<${infer STag}` ? ParseTag<STag> : never;

type isEmpty<T, True = true, False = false> = [T] extends [never | undefined | ''] ? True : 'length' extends keyof T ? T['length'] extends 0 ? True : False : False;

type isValidAttrName<T> = T extends `${infer First}${infer Rest}` ?
    isIn<First, Alpha> extends true ?
    isIn<Rest, Alpha | Special> : false : false;

type _ParseAttrName<T, Ret extends string> = T extends `${infer F}${infer Rest}` ? F extends (Alpha | Special) ? _ParseAttrName<Rest, `${Ret}${F}`> : [Ret, T] : [Ret, T];

type ParseAttrName<T> = T extends `${infer First}${infer Rest}` ? First extends Alpha ? _ParseAttrName<Rest, First> : [T] : [T];

type Quote<T> = T extends `"${string}` ? '"' : T extends `'${string}` ? "'" : never;

type _EndQ<T extends string, Q extends string = '"', Ret extends string = ''> =
    T extends `${infer L}\\${Q}${infer R}` ? _EndQ<R, Q, `${Ret}${L}${Q}`> :
    T extends `${infer V}${Q}${infer Cont}` ? [`${Ret}${V}`, Cont] : [`${Ret}${T}`, ''];

type _ParseQ<T extends string, Q extends string = '"', E extends string = Q> = T extends `${Q}${infer Rest}` ? _EndQ<Rest, E> : ['', T];

type ParseQ<T extends string> = Quote<T> extends string ? _ParseQ<T, Quote<T>> : ['', T];

type ParseComment<T extends string> = _ParseQ<T, '<!--', '-->'>;

type ParseAttrValue<T extends string> = T extends `${number}${string}` ? T extends `${infer Value}${infer NRest}` ? [Value, NRest] : [T, ''] : ParseQ<T>;

/**
 * Parses attributes into objects returns [object, string]. Wish their was a way to define the types of a type.
 */
type _ParseAttributes<T, Ret extends {} = {}> = ParseAttrName<Trim<T>> extends [infer Attr, infer Rest] ?
    Attr extends string ?
    Rest extends `=${infer ARest}` ? ParseAttrValue<ARest> extends [infer QValue, infer QRest] ? _ParseAttributes<QRest, AddKey<Ret, Attr, QValue>> : _ParseAttributes<Rest, Ret> :
    _ParseAttributes<Rest, AddKey<Ret, Attr, true>>
    : _ParseAttributes<Rest, Ret> : [Ret, T];

type ParseAttributes<T, Ns extends Namespaces = {}> = _ParseAttributes<T> extends [infer Attr, infer Rest] ?
    FilterNS<Attr, Ns> extends [infer IAttr, infer INs] ?
    INs extends Namespaces ? [INs, AttrToNS<IAttr, INs, Def<Ns, 'xmlns', ''>>, Rest] : [Ns, {}, Rest] : [Ns, {}, Rest] : [Ns, {}, T];

type FilterNsKey<T> = T extends `xmlns:${infer Prefix}` ? [Prefix] : T extends 'xmlns' ? [T] : never;

type _FilterNS<T, Attr extends Record<string, string> = {}, Ns extends Namespaces = {}> = T extends [infer K, infer V, ...infer Rest] ?
    K extends PropertyKey ?
    FilterNsKey<K> extends [infer NsP] ?
    NsP extends string ?
    _FilterNS<Rest, Attr, AddKey<Ns, NsP, V>> : _FilterNS<Rest, AddKey<Attr, K, V>, Ns> : [Attr, Ns] : [Attr, Ns] : [Attr, Ns];

//makes an attributes object and removes the namespace stuff, and adds the ns to the output.
type FilterNS<T, Ns extends Namespaces> = _FilterNS<ToTuple<T>, {}, Ns>;


type _AttrToNS<Attr, Ns extends Namespaces, XMLNS extends string, Ret = {}> =
    Attr extends [infer Key, infer Value, ...infer Rest] ?
    Key extends `${infer Prefix}:${infer UPKey}` ?
    _AttrToNS<Rest, Ns, XMLNS, AddKey<Ret, `${Prefix extends keyof Ns ? Ns[Prefix] : XMLNS}:${UPKey}`, Value>> :
    _AttrToNS<Rest, Ns, XMLNS, Key extends string ? AddKey<Ret, `${XMLNS}:${Key}`, Value> : Ret> : Ret;


type AttrToNS<Attr, Ns extends Namespaces, XMLNS extends string> = _AttrToNS<ToTuple<Attr>, Ns, XMLNS>
type TagToNS<T, Ns extends Namespaces> = T extends `${infer Prefix}:${infer Tag}` ? Prefix extends keyof Ns ? [Ns[Prefix], Tag] : [Ns['xmlns'], T] : [Ns['xmlns'], T];

//Adds a key to a type, if the type has the key already it overwrites it.
type AddKey<T extends {}, K extends PropertyKey, V> =   { [k in K]: V }  & (K extends keyof T ? Omit<T, K>  : T );
//If a key is a keyof T than return that value otherwise return the V
type Def<T, K, V> = K extends keyof T ? T[K] : V;

type XmlElement = {
    tagName: string;
    xmlns: string;
    attributes?: {};
    children?: XmlNode[];
    namespaces?: Namespaces;
};
type XmlCData = { cdata: string };
type XmlComment = { comment: string };
type XmlNode = XmlElement | XmlComment | XmlCData | string;

type FilterEmpty<T extends unknown[]> = T extends [infer First, ...infer Rest] ? [First] extends ['' | undefined | never | null] ? FilterEmpty<Rest> : [First, ...FilterEmpty<Rest>] : T;


//This does the heavy lifting... Parsing comments.

 type _Xml<T extends string, NS extends Namespaces = { xmlns: '' }, XmlNodes extends XmlNode[] = [] > =
    //Empty string short circuits.
    T extends '' ? XmlNodes : 
    //Parse comments
    T extends `<!--${string}` ? ParseComment<T> extends [infer Comment, infer CommentRest] ? 
        CommentRest extends string ? Comment extends '' ? Xml<CommentRest, NS> : [ {comment:Comment}, ...Xml<CommentRest, NS>] : XmlNodes : XmlNodes :
    //Parse CData
    T extends `<![CDATA[${infer CData}]]>${infer CDataRest}` ? [{cdata:CData}, ...Xml<CDataRest, NS>] : 
    //Parse tags
    T extends `<${string}` ? StartTag<T> extends [infer STag, infer Rest] ? TagToNS<STag, NS> extends [infer XMLNS, infer Tag] ?
         STag extends string ?
             Tag extends string ?
                 ParseAttributes<Rest, NS> extends [infer ANS, infer Attr, infer ARest] ?
                     ANS extends Namespaces ?
                          Trim<ARest> extends `/>${infer Continue}` ? [{ tagName: STag, xmlns: XMLNS, namespaces: ANS, attributes: Attr }, ...Xml<Continue, ANS>] :
                     Trim<ARest> extends `>${infer Content}</${STag}>${infer Continue}` ?
                         Content extends `${infer ContentText}<${infer ContentXml}` ?
                             [{ tagName: STag, xmlns: XMLNS, attributes: Attr, namespaces: ANS, children: isEmpty<ContentText, Xml<`<${ContentXml}`, ANS>, [ContentText, ...Xml<`<${ContentXml}`, ANS>]> }, ...Xml<Continue, ANS>] :
                             [{ tagName: STag, xmlns: XMLNS, attributes: Attr, namespaces: ANS, children: [Content] }, ...Xml<Continue, ANS>] :
                       ARest extends string ? Xml<ARest, ANS> : XmlNodes : XmlNodes : XmlNodes : XmlNodes : XmlNodes: XmlNodes: XmlNodes: 
    //So we always start with '<' but here we reset it so we can handle text nodes
    T extends `${infer Content}<${infer Rest}` ?  Content extends '' ? Xml<Rest, NS> : [{ content: Content}, ...Xml<Rest,NS>] :
    //Here we handle just text.
    T extends string ? [T] : XmlNodes;
    
type Xml<T extends string, NS extends Namespaces = { xmlns: 'xml' }> = FilterEmpty<_Xml<T, NS>>;


//From here down is Serialization code -- funny its longer than the parsing code. I really which there was a better way.
//ToTuple was swiped from https://stackoverflow.com/questions/53058150/convert-an-interface-to-a-tuple-in-typescript and https://stackoverflow.com/questions/52855145/typescript-object-type-to-array-type-tuple
type UnionToIntersection<U> = (U extends any ? (k: U) => void : never) extends ((k: infer I) => void) ? I : never

type LastOf<T> =  UnionToIntersection<T extends any ? () => T : never> extends () => (infer R) ? R : never

// TS4.1+
type ToTuple<R, T extends keyof R = keyof R, L = LastOf<T>> = [T] extends [never] ?  [] : [...ToTuple<R, Exclude<T, L>>, L, R[L & keyof R]];

//end swipe

type _AttrToStr<T extends readonly any[]> = T extends [infer Key, infer Value, ...infer Rest] ? Key extends string ?
    Value extends `${infer First}${infer VRest}` ? First extends NUMBS ? ` ${Key}=${First}${VRest}${_AttrToStr<Rest>}` : ` ${Key}="${Value}"${_AttrToStr<Rest>}` :
    Value extends string ? ` ${Key}="${Value}"${_AttrToStr<Rest>}` :
    Value extends true ? ` ${Key}${_AttrToStr<Rest>}` :
    Value extends false ? ` ${_AttrToStr<Rest>}` :
    Value extends number ? ` ${Key}=${Value}${_AttrToStr<Rest>}` :

    '' : '' : '';

type AttrToStr<T> = _AttrToStr<ToTuple<T>> extends never ? '' : _AttrToStr<ToTuple<T>>;

type CloseTag<Tag extends string, Content extends string> = Content extends ('' | never | undefined) ? '/>' : `>${Content}</${Tag}>`

//Fix Serialization so that it outputs namespaces correctly.
type Serialize<T> = T extends [infer First, ...infer Rest] ?
    First extends XmlComment ? `<!--${First['comment']}-->${Serialize<Rest>}` :
    First extends XmlCData ? `<![CDATA[${First['cdata']}]]>${Serialize<Rest>}` :
    First extends XmlElement ? `<${First['tagName']}${AttrToStr<First['attributes']>}${CloseTag<First['tagName'], Serialize<First['children']>>}${Serialize<Rest>}` :
    First extends string ? `${First}${Serialize<Rest>}` :
    '' : '';


//Tests 
type PpNs0 = Xml<`<p xmlns:b="namespace-b" xmlns='new-ns' attr="1"><b:stuff b:what="1">s</b:stuff></p>`, { xmlns: 'stuff' }>;
type CM0 = Xml<'<!-- comment -->'>;
type Pp0 = Xml<`<br/>`, { xmlns: 'stuff' }>;
type PpS = Serialize<Pp0>;
type PC0 = Xml<`<br><![CDATA[hello]]>blag</br>`, { xmlns: 'stuff' }>;
type PC0ST = Serialize<PC0>;
type XmlNS0 = Xml<`<a:br/>`, { xmlns: 'stuff', a: 'aspace' }>;
type XmlNS1 = Xml<`<a:br xmlns='def' brattr=2><p attr=1>is def</p></a:br>`, { xmlns: 'stuff', a: 'aspace' }>;
type SNS1 = Serialize<XmlNS1>;

type Pp0_ = Xml<`<br />`>;

type Pp0C = Xml<`<br><!-- hello --></br>`>;
type Pp0CS = Serialize<Pp0C>;

type Pp01 = Xml<`<br class="foo" />`>;
type Pp01_ = Serialize<Pp01>;

type P2p01 = Xml<`<br-1 class="foo" />`>;

type Pp1 = Xml<`<hello>what</hello>Rest`>;
type Pp1_ = Serialize<Pp1>;
type Pp2 = Xml<`<hello></hello>Rest`>;
type Pp3 = Xml<`<hello>what</hello>`>;
type Pp4 = Xml<`<hello class='name'>what</hello>`>;
type Pp5 = Xml<`<hello>what<br/></hello>`>;
type PPTT = `<hello>what<p>deep</p>more</hello>`;
type Pp6 = Xml<PPTT>;
type Pp6_ = Serialize<Pp6>;
type BR = Xml<'<br/>'>;
type BR0 = Xml<'<br class="what"/>'>;
type BR1 = Xml<'<br></br>'>;
type BR2 = Xml<'<br class="stuff"></br>'>;

type DIV1 = Xml<'<div>hello</div>'>;
type DIV2 = Xml<'<div>hello<br/>foo</div>'>;

type XX1 = Xml<'<div>hello<br/></div>'>;

type P1 = Xml<`<div class='stuff'>hello</div>`>;

type P2 = Xml<`<div><span>he</span><why></why></div>`>;


type X0 = Xml<`<div/><span/>`>;
type X1 = Xml<`<div class='super' value="1"/>`>;
type X2 = Xml<`<div class='super' value="1"></div>`>;

type X3 = Xml<`<div class='super' value="1"><span>hello<br/></span></div>`>;


//Other Tests
type T1 = isTagValid<'tag'>;
type T2 = isTagValid<'1tag'>;//false
type T3 = isTagValid<'tag1'>;
type T4 = isTagValid<'T'>;
type T5 = isTagValid<'_T_'>;
type T6 = isTagValid<''>;
type T7 = isTagValid<'T%'>;//false
type T8 = isTagValid<'T.1'>;
type T9 = isTagValid<'hello:world'>;

type TPA1 = ParseAttributes<'hello="world"'>;
type TPA1_ = _ParseAttributes<'hello="world"'>;
type TPA2 = ParseAttributes<`hello="world" goodbye='lo"nliness'`>;
type TPA3 = ParseAttributes<'hello="world" goodbye'>;
type TPA4 = ParseAttributes<'value=1 more>'>;
type TPA5 = ParseAttributes<'more>'>;
type TPA6 = ParseAttributes<'>helo'>;
type T_PA1 = ParseAttrValue<'1 more'>;
type T_PA2 = ParseAttrValue<'"what" more do you want'>;
type T_PA3 = ParseAttrValue<'what more do you want'>;

type PT1 = StartTag<'<hello/>world'>;
type PT1_1 = StartTag<'<hello />world'>;
type PT2 = StartTag<'<foo.bar rest/>'>;
type PT3 = StartTag<'foo.bar rest'>;
type TParseQ1 = ParseQ<`"hello"world`>;
type TParseQ2 = ParseQ<`'hello'world`>;
type TParseQ3 = ParseQ<`'hel\\'lo\\'wo'rld`>;
type TQuote1 = Quote<`"hello`>;
type TQuote2 = Quote<`'hello`>;
type TQuote3 = Quote<'hello'>;
type PA0 = ParseAttrName<'foo'>;
type PA1 = ParseAttrName<'foo=bar'>;
type PA2 = ParseAttrName<'f'>;
type PA3 = ParseAttrName<'f-v=1'>;
type PA4 = ParseAttrName<'f._-v=1'>;

type IVA1 = isValidAttrName<'hello'>;
type IVA2 = isValidAttrName<'hello='>;
type A2NS1 = AttrToNS<{ a: '1', 'b:attr': 'b 2' }, { xmlns: 'not-default', 'b': 'namespace-b' }, 'default'>;
type iSe1 = isEmpty<''>;
type iSe2 = isEmpty<[]>;
type iSe2_ = isEmpty<never>;
type iSe3 = isEmpty<undefined, 'nope'>;
type iSe4 = isEmpty<[1], true, 'one'>;
type iSe5 = isEmpty<[], true, 'one'>;
type TP1 = ParseComment<`<!-- comment -->there`>;
type FNs1 = FilterNsKey<'xmlns:stuff'>;
type FilterNSA1 = FilterNS<{ xmlns: 'xml-ns', stuff: 'ss', 'xmlns:a': 'a' }, { 'c': 'c-namespace' }>;
type AddKey1 = AddKey<{ a: 1, b: 2 }, 'b', 3>;


type A2S1 = AttrToStr<{ b: `1`, c: true, d: 'what' }>;
type A2S2 = AttrToStr<{}>;

Playground Link

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment