Skip to content

Instantly share code, notes, and snippets.

@jspears
Last active February 18, 2022 04:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jspears/b44babfeb0f853c2a7e6ebab52e9e723 to your computer and use it in GitHub Desktop.
Save jspears/b44babfeb0f853c2a7e6ebab52e9e723 to your computer and use it in GitHub Desktop.
Xml in TypeScript Types Take 2

I was really unhappy with my first attempt an XML Parser in TypeScript types, it really didn't work. So I started over, and this one pretty much works. If you don't count being super strict as works. It'd be pretty easy to fix its lax ness. Also handling name spaces would be a bit of work.

type ALPHA = 'a'|'b'|'c'|'d'|'e'|'f'|'g'|'h'|'i'|'j'|'k'|'l'|'m'|'n'|'o'|'p'|'q'|'r'|'s'|'t'|'u'|'v'|'w'|'x'|'y'|'z';
type Alpha = ALPHA | Uppercase<ALPHA>;
type NUMBS = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9';
type Special = '_' | '-' | '.';

type Trim<T> = T extends (` ${infer V}`  | `${infer V} ` | `${infer V}\n`) ? Trim<V>: T;
type isIn<T, Group> = T extends `${infer First}${infer Rest}` ? First extends Group ? Rest extends '' ? true : isIn<Rest, Group> : false : false;
type isTagValid<T> = T extends `${infer First}${infer Rest}` ? isIn<First, Alpha | '_'> extends true ? Rest extends '' ? true :  isIn<Rest, Alpha | NUMBS | Special> : false : false; 
type ParseTag<T, Ret extends string = ''> = T extends `${infer First}${infer Rest}` ? isTagValid<`${Ret}${First}`> extends true ? ParseTag<Rest, `${Ret}${First}`> : [Ret, T] : [Ret, T];
type StartTag<T> = T extends `<${infer STag}` ? ParseTag<STag> : never;

type isValidAttrName<T> = T extends `${infer First}${infer Rest}` ?
                                isIn<First, Alpha> extends true ? 
                                    isIn<Rest, Alpha | Special> : false : false;

type _ParseAttrName<T, Ret extends string> =  T extends `${infer F}${infer Rest}` ? F extends (Alpha | Special) ? _ParseAttrName<Rest, `${Ret}${F}`> : [Ret, T] : [Ret, T];

type ParseAttrName<T> = T extends `${infer First}${infer Rest}` ? First extends Alpha ? _ParseAttrName<Rest, First> : [T] : [T];

type Quote<T> = T extends `"${string}` ? '"' : T extends `'${string}` ? "'" : never;

type _EndQ<T extends string, Q extends string = '"', Ret extends string = ''> =  
    T extends `${infer L}\\${Q}${infer R}` ? _EndQ<R, Q, `${Ret}${L}${Q}`> :
    T extends `${infer V}${Q}${infer Cont}` ? [`${Ret}${V}`, Cont ] : [`${Ret}${T}`, ''] ;

type _ParseQ<T extends string, Q extends string ='"'> = T extends `${Q}${infer Rest}` ?  _EndQ<Rest, Q> : ['',T];

type ParseQ<T extends string> =Quote<T> extends string ? _ParseQ<T, Quote<T>> : ['',T];

type ParseAttrValue<T extends string> = T extends `${number}${string}` ? T extends `${infer Value} ${infer NRest}` ? [Value, NRest] : [T, ''] : ParseQ<T>;

/**
 * Parses attributes into objects returns [object, string]. Wish their was a way to define the types of a type.
 */
type ParseAttributes<T, Ret extends {} = {}> = ParseAttrName<Trim<T>> extends [infer Attr, infer Rest] ?
     Attr extends string ?
     Rest extends `=${infer ARest}` ? ParseAttrValue<ARest> extends [infer QValue, infer QRest] ?  ParseAttributes<QRest, Ret & {[k in Attr]:QValue}> : ParseAttributes<Rest,Ret> : 
     ParseAttributes<Rest, {[k in Attr]:true} & Ret>
    : ParseAttributes<Rest,Ret> : [Ret, T];

type XmlElement =  {
    type:string;
    attributes?:{};
    children?:XmlNode[];
};

type XmlNode = XmlElement |  string;

type Xml<T extends string, Nodes extends readonly XmlNode[] = []> =  StartTag<T> extends [infer Tag, infer Rest] ? Tag extends string ?
  ParseAttributes<Rest> extends [infer Attr, infer ARest] ?
    Trim<ARest> extends `/>${infer Continue}` ?  [...Nodes, {type:Tag, attributes:Attr}, ...Xml<Continue>] : 
      Trim<ARest> extends `>${infer Content}</${Tag}>${infer Continue}` ? 
        Content extends `${infer ContentText}<${infer ContentXml}` ? 
          [...Nodes, {type:Tag, attributes:Attr, children:[ContentText,...Xml<`<${ContentXml}`> ] }, ...Xml<Continue>] :
          [...Nodes, {type:Tag, attributes:Attr, children:[Content]},...Xml<Continue>]
    : ARest extends string ? [...Nodes, ...Xml<ARest>] : Nodes 
        : Nodes : [...Nodes, T ] : Nodes;

type Pp0 = Xml<`<br/>`>;
type Pp0_= Xml<`<br />`>;

type Pp01 = Xml<`<br class="foo" />`>;
type P2p01 = Xml<`<br-1 class="foo" />`>;

type Pp1 = Xml<`<hello>what</hello>Rest`>;
type Pp2 = Xml<`<hello></hello>Rest`>;
type Pp3 = Xml<`<hello>what</hello>`>;
type Pp4 = Xml<`<hello class='name'>what</hello>`>;
type Pp5 = Xml<`<hello>what<br/></hello>`>;
type Pp6 = Xml<`<hello>what<p>deep</p>more</hello>`>;

type BR = Xml<'<br/>'>;
type BR0 = Xml<'<br class="what"/>'>;
type BR1 = Xml<'<br></br>'>;
type BR2 = Xml<'<br class="stuff"></br>'>;

type DIV1 = Xml<'<div>hello</div>'>;
type DIV2 = Xml<'<div>hello<br/>foo</div>'>;

type XX1=Xml<'<div>hello<br/></div>'>;

type P1 = Xml<`<div class='stuff'>hello</div>`>;

type P2 = Xml<`<div><span>he</span><why></why></div>`>;


type X0 = Xml<`<div/><span/>`>;        
type X1 = Xml<`<div class='super' value="1"/>`>      ;  
type X2 = Xml<`<div class='super' value="1"></div>`>;        
 
type X3 = Xml<`<div class='super' value="1"><span>hello<br/></span></div>`>;  


//Type Tests
type T1 = isTagValid<'tag'>;
type T2 = isTagValid<'1tag'>;//false
type T3 = isTagValid<'tag1'>;
type T4 = isTagValid<'T'>;
type T5 = isTagValid<'_T_'>;
type T6 = isTagValid<''>;
type T7 = isTagValid<'T%'>;//false
type T8 = isTagValid<'T.1'>;
type TPA1 = ParseAttributes<'hello="world"'>;
type TPA2 = ParseAttributes<`hello="world" goodbye='lo"nliness'`>;
type TPA3 = ParseAttributes<'hello="world" goodbye'>;
type TPA4 = ParseAttributes<'value=1 more>'>;
type TPA5 = ParseAttributes<'more>'>;
type TPA6 = ParseAttributes<'>helo'>;
type T_PA1 = ParseAttrValue<'1 more'>;
type T_PA2 = ParseAttrValue<'"what" more do you want'>;
type T_PA3 = ParseAttrValue<'what more do you want'>;

type PT1 = StartTag<'<hello/>world'>;
type PT1_1 = StartTag<'<hello />world'>;
type PT2 = StartTag<'<foo.bar rest/>'>;
type PT3 = StartTag<'foo.bar rest'>;
type TParseQ1= ParseQ<`"hello"world`>;
type TParseQ2= ParseQ<`'hello'world`>;
type TParseQ3= ParseQ<`'hel\\'lo\\'wo'rld`>;
type TQuote1 = Quote<`"hello`>;
type TQuote2 = Quote<`'hello`>;
type TQuote3 = Quote<'hello'>;
type PA0 = ParseAttrName<'foo'>;
type PA1 = ParseAttrName<'foo=bar'>;
type PA2 = ParseAttrName<'f'>;
type PA3 = ParseAttrName<'f-v=1'>;
type PA4 = ParseAttrName<'f._-v=1'>;

type IVA1 = isValidAttrName<'hello'>;
type IVA2 = isValidAttrName<'hello='>;

playground

@jspears
Copy link
Author

jspears commented Feb 18, 2022

Updated to add support XML comments.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment