loilo/regex-tag.md

## regex-tag.md

      
    Raw
  

              regex-tag.md
            
          
    Regular Expression Template Tag in TypeScript


I liked this gist enough that I made it a full-blown npm package. Check it out here.

After having used regular expression escape functions for years, it just crossed my mind that this might be way easier to achieve with a tagged template literal.
This is how to use this gist:
import rx from 'https://unpkg.com/@loilo/rx/dist/rx.mjs'

const disallowedWindowsFilenameChars = rx`[${'<>:"/\\|?*'}]`

if (disallowedWindowsFilenameChars.test(someFilename)) {
  console.error('Invalid characters in filename')
} else {
  console.log("You're probably fine")
}

Disclaimer: Please keep in mind that all code examples in this gist are exclusively for demonstrational purposes. Most of them can be solved more efficiently and elegantly without any use of regular expressions.

Flags

To use flags in our regular expression, we can pass them to the rx() function and use the result as a template:
function matchCaseInsensitive(string) {
  return rx('i')`${string}`
}

const pattern = matchCaseInsensitive('foo')
pattern.test('foo') // true
pattern.test('fOO') // true
Raw Strings

From time to time, we may want to include control characters in some kind of conditional when rx inadvertently escapes them:
function naiveNumberMatcher(allowFloat) {
  return rx`^-?[0-9]+${allowFloat ? '(\\.[0-9]+)?' : ''}$`
}

const pattern = naiveNumberMatcher(true)
// pattern = /^-?[0-9]+\(\\\.\[0\-9\]\+\)\?$/
// Snap! This won't match floating point numbers.
Luckily, there's an easy solution: just return the control characters as a regular expression:
function naiveNumberMatcher(allowFloat) {
  return rx`^-?[0-9]+${allowFloat ? /(\.[0-9]+)?/ : ''}$`
}

const intPattern = naiveNumberMatcher(false) // intPattern = /^-?[0-9]+$/
intPattern.test('abc') // false
intPattern.test('0') // true
intPattern.test('-1') // true
intPattern.test('1.5') // false

const floatPattern = naiveNumberMatcher(true) // floatPattern = /^-?[0-9]+(\.[0-9]+)?$/
floatPattern.test('abc') // false
floatPattern.test('0') // true
floatPattern.test('-1') // true
floatPattern.test('1.5') // true
Alternatively, we could have wrapped the control characters in an rx.raw() call which will exclude them from being escaped:
function naiveNumberMatcher(allowFloat) {
  // rx.raw also works as a template tag: rx.raw`(\\.[0-9]+)?`
  return rx`^-?[0-9]+${allowFloat ? rx.raw('(\\.[0-9]+)?') : ''}$`
}
This can be necessary when the wrapped control characters are quantifiers which cannot form a regular expression of their own, e.g. /?/.
Arrays

If an array is passed as a placeholder, its entries will be escaped and joined by a vertical bar — this way, we can easily express enumerations:
function oneOf(...strings) {
  return rx('i')`^${strings}$`
}

const pattern = oneOf('a', 'b') // pattern = /^a|b$/i
pattern.test('a') // true
pattern.test('B') // true
pattern.test('d') // false
Note that arrays may also contain regular expressions or rx.raw strings which stay unescaped as described above:
function oneOfTheseOrInteger(...strings) {
  return rx('i')`^(${[...strings, /[0-9]+/]})$`
}

const pattern = oneOfTheseOrInteger('a', 'b') // pattern = /^(a|b|[0-9]+)$/i
pattern.test('A') // true
pattern.test('d') // false
pattern.test('42') // true
Arrays can even be nested and are flattened automatically:
const naivePluralize = value => value + 's'

function oneOrMultipleOf(...strings) {
  return rx`^${strings.map(string => [string, naivePluralize(string)])}$`
}

oneOrMultipleOf('cat', 'dog') // /^cat|cats|dog|dogs$/i
Credit

In the world of programming, you're basically never the first person to come up with a clever trick. I googled my idea and it turned out that Lea Verou published the very same thing in 2018 — and this gist is loosely based on her implementation.
Key differences are that I added type hints (therefore this gist is written in TypeScript) and the aforementioned capability to merge in arrays and raw strings.

  
## regex-tag.ts
/**
 * Sanitize function, removes special regex characters from a string.
 * -> Creates a literal part of a RegExp
 * So you can do this without worrying about special chars:
 * new RegExp(sanitize(anyString))
 */
export function sanitizeRegex(value: string) {
  return value.replace(/[-[\]/{}()*+?.\\^$|]/g, '\\$&')
}

/**
 * A template tag for ES2015 tagged template literals
 */
export interface TemplateTag<T = string> {
  (string: TemplateStringsArray, ...values: any[]): T
}

/**
 * A regular expression literal
 */
export class RxLiteral {
  public constructor(private value: string) {}

  public toString() {
    return this.value
  }
}

/**
 * Factory function for a template tag which creates regular expressions
 *
 * @param flags Flags to use in the regular expression
 */
export function rx(flags: string): TemplateTag<RegExp>

/**
 * A template tag for creating regular expressions; embedded values will be escaped
 *
 * @param strings String partials of the regular expression
 * @param values  Values to escape
 */
export function rx(strings: TemplateStringsArray, ...values: any[]): RegExp
export function rx(...args: any[]) {
  function handlePlaceholder(placeholder: any): string {
    if (placeholder instanceof RegExp) {
      return placeholder.source
    } else if (placeholder instanceof RxLiteral) {
      return String(placeholder)
    } else if (Array.isArray(placeholder)) {
      return placeholder.map(handlePlaceholder).join('|')
    } else {
      return sanitizeRegex(placeholder)
    }
  }

  function replacer(
    flags: string,
    strings: TemplateStringsArray,
    ...values: any[]
  ) {
    return RegExp(
      strings.slice(1).reduce((carry, string, index) => {
        return carry.concat(handlePlaceholder(values[index]), string)
      }, strings[0]),
      flags
    )
  }

  // If the first argument is an array, use this function as template tag
  if (Array.isArray(args[0])) {
    return replacer('', ...(args as Parameters<TemplateTag>))
  }

  // Otherwise, use it as template tag factory
  return replacer.bind(undefined, args[0])
}

function createRxLiteral(value: string): RxLiteral
function createRxLiteral(
  strings: TemplateStringsArray,
  ...values: any[]
): RxLiteral
function createRxLiteral(...args: any[]) {
  if (Array.isArray(args[0])) {
    const strings = (args[0] as unknown) as TemplateStringsArray
    const values = args.slice(1)
    return new RxLiteral(
      strings
        .slice(1)
        .reduce(
          (carry, string, index) => carry.concat(values[index], string),
          strings[0]
        )
    )
  } else {
    return new RxLiteral(args[0])
  }
}
rx.raw = createRxLiteral
	/**
	* Sanitize function, removes special regex characters from a string.
	* -> Creates a literal part of a RegExp
	* So you can do this without worrying about special chars:
	* new RegExp(sanitize(anyString))
	*/
	export function sanitizeRegex(value: string) {
	return value.replace(/[-[\]/{}()*+?.\\^$\|]/g, '\\$&')
	}

	/**
	* A template tag for ES2015 tagged template literals
	*/
	export interface TemplateTag<T = string> {
	(string: TemplateStringsArray, ...values: any[]): T
	}

	/**
	* A regular expression literal
	*/
	export class RxLiteral {
	public constructor(private value: string) {}

	public toString() {
	return this.value
	}
	}

	/**
	* Factory function for a template tag which creates regular expressions
	*
	* @param flags Flags to use in the regular expression
	*/
	export function rx(flags: string): TemplateTag<RegExp>

	/**
	* A template tag for creating regular expressions; embedded values will be escaped
	*
	* @param strings String partials of the regular expression
	* @param values Values to escape
	*/
	export function rx(strings: TemplateStringsArray, ...values: any[]): RegExp
	export function rx(...args: any[]) {
	function handlePlaceholder(placeholder: any): string {
	if (placeholder instanceof RegExp) {
	return placeholder.source
	} else if (placeholder instanceof RxLiteral) {
	return String(placeholder)
	} else if (Array.isArray(placeholder)) {
	return placeholder.map(handlePlaceholder).join('\|')
	} else {
	return sanitizeRegex(placeholder)
	}
	}

	function replacer(
	flags: string,
	strings: TemplateStringsArray,
	...values: any[]
	) {
	return RegExp(
	strings.slice(1).reduce((carry, string, index) => {
	return carry.concat(handlePlaceholder(values[index]), string)
	}, strings[0]),
	flags
	)
	}

	// If the first argument is an array, use this function as template tag
	if (Array.isArray(args[0])) {
	return replacer('', ...(args as Parameters<TemplateTag>))
	}

	// Otherwise, use it as template tag factory
	return replacer.bind(undefined, args[0])
	}

	function createRxLiteral(value: string): RxLiteral
	function createRxLiteral(
	strings: TemplateStringsArray,
	...values: any[]
	): RxLiteral
	function createRxLiteral(...args: any[]) {
	if (Array.isArray(args[0])) {
	const strings = (args[0] as unknown) as TemplateStringsArray
	const values = args.slice(1)
	return new RxLiteral(
	strings
	.slice(1)
	.reduce(
	(carry, string, index) => carry.concat(values[index], string),
	strings[0]
	)
	)
	} else {
	return new RxLiteral(args[0])
	}
	}
	rx.raw = createRxLiteral