Steven Levithan slevithan

## gist:4222600
When browsers implement ES6 template strings, add tags XRegExp.r (regex as raw string) and
XRegExp.rx (regex with implicit free-spacing, as raw string). Don't need tag XRegExp.raw (raw
string), because you should be able to use, e.g., XRegExp(String.raw`\w`). Don't need to support
flags /gy (which XRegExp doesn't allow in mode modifiers) with XRegExp.r/rx, since XRegExp methods
provide alternate mechanisms for /gy (scope 'all', the sticky option, lack of need for lastIndex
updating, and the XRegExp.globalize method if you really need it). All other flags (e.g., /im and
custom flags /snxA) can be applied via a leading mode modifier (e.g., XRegExp.r`(?s).`).

If the above sounds confusing, keep in mind that you can simply maintain the status quo but still
gain the benefits of raw multiline template strings (no more double escaping!) via, e.g.,

## regexp-iterate.js
// Using native JavaScript...

function getMatches(str, regex) {
    var matches = [];
    var match;

    if (regex.global) {
        regex.lastIndex = 0;
    } else {
        regex = new RegExp(regex.source, 'g' +

## dot-codepoint.js
// Updated for XRegExp 3.0.0

// Make unescaped dots outside of character classes match any code point rather
// than code unit. Accounts for XRegExp's flag s (aka dotall or singleline).
XRegExp.addToken(/\./, function(match, scope, flags) {
    return flags.indexOf("s") > -1 ?
        "(?:[\ud800-\udbff][\udc00-\udfff]|[\0-\uffff])" :
        "(?:[\ud800-\udbff][\udc00-\udfff]|[\0-\x09\x0b\x0c\x0e-\u2027\u202a-\uffff])";
});

## xregexp-unicode-codepoints.js
// Allow syntax extensions
XRegExp.install("extensibility");

/* Adds Unicode code point syntax to XRegExp: \u{n..}
 * `n..` is any 1-6 digit hexadecimal number from 0-10FFFF. Comes from ES6 proposals. Code points
 * above U+FFFF are converted to surrogate pairs, so e.g. `\u{20B20}` is simply an alternate syntax
 * for `\uD842\uDF20`. This can lead to broken behavior if you follow a `\u{n..}` token that
 * references a code point above U+FFFF with a quantifier, or if you use the same in a character
 * class. Using `\u{n..}` with code points above U+FFFF is therefore not recommended, unless you
 * know exactly what you're doing. XRegExp's handling follows ES6 proposals for `\u{n..}`, since

## xregexp-lookbehind2.js
// Simulating infinite-length leading lookbehind in JavaScript. Uses XRegExp.
// Captures within lookbehind are not included in match results. Lazy
// repetition in lookbehind may lead to unexpected results.

(function (XRegExp) {

    function prepareLb(lb) {
        // Allow mode modifier before lookbehind
        var parts = /^((?:\(\?[\w$]+\))?)\(\?<([=!])([\s\S]*)\)$/.exec(lb);
        return {

## xregexp-lookbehind.js
// Simulating infinite-length leading lookbehind in JavaScript. Uses XRegExp
// and XRegExp.matchRecursive. Any regex pattern can be used within lookbehind,
// including nested groups. Captures within lookbehind are not included in
// match results. Lazy repetition in lookbehind may lead to unexpected results.

(function (XRegExp) {

    function preparePattern(pattern, flags) {
        var lbOpen, lbEndPos, lbInner;
        flags = flags || "";

## real-xregexp.js
// Creating a grammatical pattern for real numbers using XRegExp.build

/*
 * Approach 1: Make all of the subpatterns reusable
 */

var lib = {
    digit:             /[0-9]/,
    exponentIndicator: /[Ee]/,
    digitSeparator:    /[_,]/,

## es6-unicode-shims.js
/*!
 * ES6 Unicode Shims 0.1
 * (c) 2012 Steven Levithan <http://slevithan.com/>
 * MIT License
 */

/**
 * Returns a string created using the specified sequence of Unicode code points. Accepts integers
 * between 0 and 0x10FFFF. Code points above 0xFFFF are converted to surrogate pairs. If a provided
 * integer is in the surrogate range, it produces an unpaired surrogate. Comes from accepted ES6

## split.js
/*!
 * Cross-Browser Split 1.1.1
 * Copyright 2007-2012 Steven Levithan <stevenlevithan.com>
 * Available under the MIT License
 * ECMAScript compliant, uniform cross-browser split method
 */

/**
 * Splits a string into an array of strings using a regex or string separator. Matches of the
 * separator are not included in the result array. However, if `separator` is a regex that contains
	When browsers implement ES6 template strings, add tags XRegExp.r (regex as raw string) and
	XRegExp.rx (regex with implicit free-spacing, as raw string). Don't need tag XRegExp.raw (raw
	string), because you should be able to use, e.g., XRegExp(String.raw`\w`). Don't need to support
	flags /gy (which XRegExp doesn't allow in mode modifiers) with XRegExp.r/rx, since XRegExp methods
	provide alternate mechanisms for /gy (scope 'all', the sticky option, lack of need for lastIndex
	updating, and the XRegExp.globalize method if you really need it). All other flags (e.g., /im and
	custom flags /snxA) can be applied via a leading mode modifier (e.g., XRegExp.r`(?s).`).

	If the above sounds confusing, keep in mind that you can simply maintain the status quo but still
	gain the benefits of raw multiline template strings (no more double escaping!) via, e.g.,
	// Using native JavaScript...

	function getMatches(str, regex) {
	var matches = [];
	var match;

	if (regex.global) {
	regex.lastIndex = 0;
	} else {
	regex = new RegExp(regex.source, 'g' +
	// Updated for XRegExp 3.0.0

	// Make unescaped dots outside of character classes match any code point rather
	// than code unit. Accounts for XRegExp's flag s (aka dotall or singleline).
	XRegExp.addToken(/\./, function(match, scope, flags) {
	return flags.indexOf("s") > -1 ?
	"(?:[\ud800-\udbff][\udc00-\udfff]\|[\0-\uffff])" :
	"(?:[\ud800-\udbff][\udc00-\udfff]\|[\0-\x09\x0b\x0c\x0e-\u2027\u202a-\uffff])";
	});
	// Allow syntax extensions
	XRegExp.install("extensibility");

	/* Adds Unicode code point syntax to XRegExp: \u{n..}
	* `n..` is any 1-6 digit hexadecimal number from 0-10FFFF. Comes from ES6 proposals. Code points
	* above U+FFFF are converted to surrogate pairs, so e.g. `\u{20B20}` is simply an alternate syntax
	* for `\uD842\uDF20`. This can lead to broken behavior if you follow a `\u{n..}` token that
	* references a code point above U+FFFF with a quantifier, or if you use the same in a character
	* class. Using `\u{n..}` with code points above U+FFFF is therefore not recommended, unless you
	* know exactly what you're doing. XRegExp's handling follows ES6 proposals for `\u{n..}`, since
	// Simulating infinite-length leading lookbehind in JavaScript. Uses XRegExp.
	// Captures within lookbehind are not included in match results. Lazy
	// repetition in lookbehind may lead to unexpected results.

	(function (XRegExp) {

	function prepareLb(lb) {
	// Allow mode modifier before lookbehind
	var parts = /^((?:\(\?[\w$]+\))?)\(\?<([=!])([\s\S]*)\)$/.exec(lb);
	return {
	// Simulating infinite-length leading lookbehind in JavaScript. Uses XRegExp
	// and XRegExp.matchRecursive. Any regex pattern can be used within lookbehind,
	// including nested groups. Captures within lookbehind are not included in
	// match results. Lazy repetition in lookbehind may lead to unexpected results.

	(function (XRegExp) {

	function preparePattern(pattern, flags) {
	var lbOpen, lbEndPos, lbInner;
	flags = flags \|\| "";
	// Creating a grammatical pattern for real numbers using XRegExp.build

	/*
	* Approach 1: Make all of the subpatterns reusable
	*/

	var lib = {
	digit: /[0-9]/,
	exponentIndicator: /[Ee]/,
	digitSeparator: /[_,]/,
	/*!
	* ES6 Unicode Shims 0.1
	* (c) 2012 Steven Levithan <http://slevithan.com/>
	* MIT License
	*/

	/**
	* Returns a string created using the specified sequence of Unicode code points. Accepts integers
	* between 0 and 0x10FFFF. Code points above 0xFFFF are converted to surrogate pairs. If a provided
	* integer is in the surrogate range, it produces an unpaired surrogate. Comes from accepted ES6
	/*!
	* Cross-Browser Split 1.1.1
	* Copyright 2007-2012 Steven Levithan <stevenlevithan.com>
	* Available under the MIT License
	* ECMAScript compliant, uniform cross-browser split method
	*/

	/**
	* Splits a string into an array of strings using a regex or string separator. Matches of the
	* separator are not included in the result array. However, if `separator` is a regex that contains