Skip to content

Instantly share code, notes, and snippets.

@renoirb
Last active March 9, 2020 00:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save renoirb/e7d344cb88524800c247c6842e4eb550 to your computer and use it in GitHub Desktop.
Save renoirb/e7d344cb88524800c247c6842e4eb550 to your computer and use it in GitHub Desktop.
/* eslint-disable @typescript-eslint/ban-ts-ignore */
/**
* File: src/tokenizer.ts
*/
import co from 'co'
const TOKENIZER_END = Symbol('TOKENIZER_END')
/**
* Coroutine Generator Sink.
*
* Bookmarks:
* - https://twitter.com/renoirb/status/1236386606266953731
* - https://github.com/DefinitelyTyped/DefinitelyTyped/blob/master/types/co/index.d.ts
* - https://gist.github.com/OrionNebula/bd2d4339497a2c05e599d7d24038d290
* - https://github.com/danoctavian/node-coroutine-utils
* - https://github.com/wowts/coroutine
* - http://calculist.org/blog/2011/12/14/why-coroutines-wont-work-on-the-web/
* - https://www.bennadel.com/blog/3264-thoughts-on-defining-coroutines-as-class-methods-in-node-js-and-typescript.htm
*/
export type CoroutineGeneratorSink<T> = Generator<T, symbol, T>
/**
* Following up tokenizer pattern illustrated by Dr. Axel Rauschmayer
*
* See [Exploring ES6, chapter 22 about Generators][exploringes6-chapter-22]
*
* [exploringes6-chapter-22]: https://exploringjs.com/es6/ch_generators.html#_step-1--tokenizing "Tokenizing"
*
* Bookmarks:
* - https://github.com/rauschma/generator-examples/blob/06b5a59/node/readlines.js
* - https://www.npmjs.com/package/@wowts/coroutine
*/
/**
* Is the passed in character a word (i.e. alpha-numeric, or punctuation.)
*
* @param ch {string} — Character to pass to the tokenizer
*/
function isWordChar(ch: unknown): ch is string {
return typeof ch === 'string' && /^[A-Za-z0-9]$/.test(ch)
}
/**
* Receives a sequence of characters (via the generator object
* method `next()`), groups them into words and pushes them
* into the generator `sink`.
*
* The following is a free-form copy from Dr. Rauchmayer’s Exploring ES6 book
* from Generators chapter.
*
* Bookmarks:
* - https://exploringjs.com/es6/ch_generators.html#_step-1--tokenize
*
* @public
* @author Axel Rauchmayer
*/
export const tokenizer = co(function*(
sink: CoroutineGeneratorSink<string>,
): Iterator<string, undefined, string> {
try {
while (true) {
// (A)
let ch = yield // (B)
if (isWordChar(ch)) {
// A word has started
let word = ''
try {
do {
word += ch
ch = yield // (C)
} while (isWordChar(ch))
} finally {
// The word is finished.
// We get here if
// - the loop terminates normally
// - the loop is terminated via `return()` in line C
sink.next(word) // (D)
}
}
// Ignore all other characters
}
} finally {
// We only get here if the infinite loop is terminated
// via `return()` (in line B or C).
// Forward `return()` to `sink` so that it is also
// aware of the end of stream.
sink.return(TOKENIZER_END)
}
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment