nchanged/tokenizer.ts

## tokenizer.ts
import { Context } from "./Context";
import { CharTypes, CharType } from "./chars";
import { OneCharToken } from "./OneCharToken";
import { CharCategory } from "./CharCategory";
import { TokenType } from "./TokenType";

export interface ITokenizeProps {
  str: string;
  locations?: boolean;
  onToken?: (token: any) => void;
}

function captureComment(c: Context): boolean {
  const next = CharTypes[c.str.charCodeAt(c.index + 1)];
  if (next & CharType.ASTERISK) {
    for (let i = c.index; i <= c.length; i++) {
      if (
        CharTypes[c.str.charCodeAt(i)] & CharType.ASTERISK &&
        CharTypes[c.str.charCodeAt(i + 1)] & CharType.FORWARD_SLASH
      ) {
        c.index = i + 1;
        return true;
      }
    }
  } else if (next & CharType.FORWARD_SLASH) {
    for (let i = c.index; i <= c.length; i++) {
      if (CharTypes[c.str.charCodeAt(i)] & CharType.END_OF_LINE) {
        c.index = i + 1;
        return true;
      }
    }
  }
}
function captureHTTPToken(c: Context) {
  let token = "http";
  for (let i = c.index; i <= c.length; i++) {
    if (CharTypes[c.str.charCodeAt(i)] & CharCategory.HTTP_TOKEN_END) {
      c.onToken({ type: TokenType.VALID, value: token });
      c.index = i;
      return true;
    } else {
      token += c.str[i];
    }
  }
}

function captureToken(c: Context): boolean {
  let token = "";
  for (let i = c.index; i <= c.length; i++) {
    const SubType = CharTypes[c.str.charCodeAt(i)];
    if (SubType & CharCategory.TOKEN_END || !SubType) {
      // special treatment for a stupid http token
      if (token === "http") {
        c.index = i;
        return captureHTTPToken(c);
      }
      c.onToken({ type: TokenType.VALID, value: token });
      c.index = i;
      return true;
    } else token += c.str[i];
  }
}

function captureDigit(c: Context): boolean {
  let type = TokenType.DIGIT;
  let token = "";
  let unit = "";
  for (let i = c.index; i <= c.length; i++) {
    const SubType = CharTypes[c.str.charCodeAt(i)];
    if (SubType & CharType.DIGIT) token += c.str[i];

    if (SubType & CharCategory.TOKEN_END || !SubType) {
      if (unit) c.onToken({ type: type, value: token, unit: unit });
      else c.onToken({ type: type, value: token });

      c.index = i;
      return true;
    } else {
      if (SubType & CharType.PERCENT) {
        c.index = i + 1;
        c.onToken({ type: TokenType.PERCENT, value: token });
        return true;
      } else if (SubType & CharType.ALPHABET) {
        type = TokenType.UNIT;
        unit += c.str[i];
      }
    }
  }
}

/**
 * Try all the characters.
 * If found, return true and leave the index iteration up to the tokenizer groups
 * @param c : Context
 */
function parseChar(c: Context): boolean {
  const num = c.str.charCodeAt(c.index);
  const char = CharTypes[num];

  if (char & CharType.FORWARD_SLASH) {
    return captureComment(c);
  } else if (OneCharToken[num]) {
    c.onToken({ type: OneCharToken[num] });
    c.index++;
    return true;
  } else if (char & CharCategory.TOKEN_START) {
    return captureToken(c);
  } else if (char & CharType.DIGIT) {
    return captureDigit(c);
  }
}

export function tokenize(props: ITokenizeProps) {
  const c: Context = {
    index: 0,
    str: props.str,
    length: props.str.length,
    onToken: props.onToken,
    locations: props.locations
  };
  while (c.index <= c.str.length) {
    if (!parseChar(c)) c.index++;
  }
}
	import { Context } from "./Context";
	import { CharTypes, CharType } from "./chars";
	import { OneCharToken } from "./OneCharToken";
	import { CharCategory } from "./CharCategory";
	import { TokenType } from "./TokenType";

	export interface ITokenizeProps {
	str: string;
	locations?: boolean;
	onToken?: (token: any) => void;
	}

	function captureComment(c: Context): boolean {
	const next = CharTypes[c.str.charCodeAt(c.index + 1)];
	if (next & CharType.ASTERISK) {
	for (let i = c.index; i <= c.length; i++) {
	if (
	CharTypes[c.str.charCodeAt(i)] & CharType.ASTERISK &&
	CharTypes[c.str.charCodeAt(i + 1)] & CharType.FORWARD_SLASH
	) {
	c.index = i + 1;
	return true;
	}
	}
	} else if (next & CharType.FORWARD_SLASH) {
	for (let i = c.index; i <= c.length; i++) {
	if (CharTypes[c.str.charCodeAt(i)] & CharType.END_OF_LINE) {
	c.index = i + 1;
	return true;
	}
	}
	}
	}
	function captureHTTPToken(c: Context) {
	let token = "http";
	for (let i = c.index; i <= c.length; i++) {
	if (CharTypes[c.str.charCodeAt(i)] & CharCategory.HTTP_TOKEN_END) {
	c.onToken({ type: TokenType.VALID, value: token });
	c.index = i;
	return true;
	} else {
	token += c.str[i];
	}
	}
	}

	function captureToken(c: Context): boolean {
	let token = "";
	for (let i = c.index; i <= c.length; i++) {
	const SubType = CharTypes[c.str.charCodeAt(i)];
	if (SubType & CharCategory.TOKEN_END \|\| !SubType) {
	// special treatment for a stupid http token
	if (token === "http") {
	c.index = i;
	return captureHTTPToken(c);
	}
	c.onToken({ type: TokenType.VALID, value: token });
	c.index = i;
	return true;
	} else token += c.str[i];
	}
	}

	function captureDigit(c: Context): boolean {
	let type = TokenType.DIGIT;
	let token = "";
	let unit = "";
	for (let i = c.index; i <= c.length; i++) {
	const SubType = CharTypes[c.str.charCodeAt(i)];
	if (SubType & CharType.DIGIT) token += c.str[i];

	if (SubType & CharCategory.TOKEN_END \|\| !SubType) {
	if (unit) c.onToken({ type: type, value: token, unit: unit });
	else c.onToken({ type: type, value: token });

	c.index = i;
	return true;
	} else {
	if (SubType & CharType.PERCENT) {
	c.index = i + 1;
	c.onToken({ type: TokenType.PERCENT, value: token });
	return true;
	} else if (SubType & CharType.ALPHABET) {
	type = TokenType.UNIT;
	unit += c.str[i];
	}
	}
	}
	}

	/**
	* Try all the characters.
	* If found, return true and leave the index iteration up to the tokenizer groups
	* @param c : Context
	*/
	function parseChar(c: Context): boolean {
	const num = c.str.charCodeAt(c.index);
	const char = CharTypes[num];

	if (char & CharType.FORWARD_SLASH) {
	return captureComment(c);
	} else if (OneCharToken[num]) {
	c.onToken({ type: OneCharToken[num] });
	c.index++;
	return true;
	} else if (char & CharCategory.TOKEN_START) {
	return captureToken(c);
	} else if (char & CharType.DIGIT) {
	return captureDigit(c);
	}
	}

	export function tokenize(props: ITokenizeProps) {
	const c: Context = {
	index: 0,
	str: props.str,
	length: props.str.length,
	onToken: props.onToken,
	locations: props.locations
	};
	while (c.index <= c.str.length) {
	if (!parseChar(c)) c.index++;
	}
	}