diegomanuel/regex.js

## regex.js
// Regular expressions: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions
// Cheatsheet: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet


// Most languages have "native/direct" ways of construct regular expressions
let re_native = /abc/;
// And also have its functional/modular constructors
let re_obj = new RegExp("abc");

// Elixir :: https://hexdocs.pm/elixir/Regex.html
// regexp = ~r/abc/
// regexp = Regex.compile("abc")

// Python :: https://docs.python.org/3/howto/regex.html
// regexp = re.compile("abc")


// Regular expressions have "modifiers" that affects the matching behaviour
// https://www.regular-expressions.info/modifiers.html

// g: global search (match or replace all occurrences)
console.log("the A letter".replace(/e/, "")); // Output: "th A letter"
console.log("the A letter".replace(/e/g, "")); // Output: "th A lttr"
// i: case insensitive
console.log("the A".match(/a/)); // Output: null
console.log("the A".match(/a/i)); // Output: [ 'A', index: 4, input: 'the A' ]
console.log("the A".match(/a/gi)); // Output: [ 'A' ]
// m: multiline strings (ie: matching an entire text file string with new lines [only affects the behavior of ^ and $])
// u: unicode strings (matching strings containing unicode characters)
// And you can mix'em
console.log("A NICE STRING IS nicer NOW 👌👌👌!".replace(/nice[r]?|👌|!/igu, "*")); // Output: "A * STRING IS * NOW ****"


// Most commonly used character classes
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes

// Character: "\" (escape next character)
console.log("$1234".replace(/\$/g, "U\$D")); // Output: "U$D1234"
console.log("the [match] (here)!".replace(/the \[match\] \(here\)/g, "-")); // Output: "-!"
// Character: "\d" (matches numbers)
console.log("the number is $1234".replace(/\d/g, "*")); // Output: "the number is $****"
// Character: "\D" (matches NOT numbers)
console.log("the number is $1234".replace(/\D/g, "*")); // Output: "***************1234"
// Character: "\w" (matches any alphanumeric character)
console.log("the number is $1234".replace(/\w+ is \$\w/g, "*")); // Output: "the *234"
// Character: "\W" (matches any NOT alphanumeric character)
console.log("the number is $1234! right???".replace(/\W/g, "*")); // Output: "the*number*is**1234**right***"
// Character: "\s" (matches whitespace character)
console.log("the number is $1234".replace(/\s/g, "-")); // Output: "the-number-is-$1234"
// Character: "\S" (you can imagine =)
console.log("the number is $1234".replace(/\S/g, "-")); // Output: "--- ------ -- -----"


// Most commonly used patterns
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Assertions
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Groups_and_Ranges
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Quantifiers

// Pattern: "^" and "$" (matches the start and the end of a string, respectively)
console.log("the start of the art".replace(/art/g, "*")); // Output: "the st* of the *"
console.log("the start of the art".replace(/^the/g, "*")); // Output: "* start of the art"
console.log("the start of the art".replace(/art$/g, "*")); // Output: "the start of the *"

// Pattern: "." (matches any single character except line terminators)
console.log("no last letters".replace(/.s/g, "*s")); // Output: "no l*st lette*s"

// Pattern: "+" (matches at least one time, repeat "n" times)
console.log("ac abc aabbcc aaabbbccc".replace(/ab+c/g, "*")); // Output: "ac * a*c aa*cc" (note the last result "aa*cc")

// Pattern: "*" (matches 0 or "n" times)
console.log("ac abc aabbcc aaabbbccc".replace(/ab*c/g, "*")); // Output: "* * a*c aa*cc"

// Pattern: "?" (the previous character or group may or may not be present)
console.log("ac abc abbc".replace(/ab?c/g, "*")); // Output: "* * abbc"

// Pattern: "x{n}", "x{n,}", "x{n,m}" (matches "n" to "m" times)
console.log("aaabbbccc".replace(/ab{3}c/g, "*")); // Output: "aa*cc"
console.log("aaabbbccc".replace(/ab{2}c/g, "no matches")); // Output: "aaabbbccc" (no matches)
console.log("aaabbbccc".replace(/ab{2,}c/g, "*")); // Output: "aa*cc"
console.log("aaabccc".replace(/ab{1,3}c/g, "*")); // Output: "aa*cc"
console.log("aaabccc".replace(/ab{2,3}c/g, "no matches")); // Output: "aaabccc" (no matches)

// Pattern: "x|y" (matches "x" or "y")
console.log("green apples and red ones".replace(/green|red/g, "*")); // Output: "* apples and * ones"

// Pattern: "[]" (matches any one of the enclosed characters)
console.log("abcdefghi".replace(/[fed]/g, "*")); // Output: "abc***ghi"
console.log("abcdefghi".replace(/[^fed]/g, "*")); // Output: "***def***"
// The "-" sign acts as a range (for letters and numbers)
console.log("abcdefghi".replace(/[b-h]/g, "*")); // Output: "a*******i"
console.log("ahgcedbfi".replace(/[b-h]/g, "*")); // Output: "a*******i"
console.log("a".replace(/a[bcd]?/g, "*")); // Output: "*"
console.log("abcd".replace(/a[bcd]?/g, "*")); // Output: "*cd" (note that it is NOT "a*")
console.log("abcd".replace(/a[bc]?d/g, "no matches")); // Output: "abcd" (no matches)

// Pattern: "()" (groups the patterns and capture the matching strings)
console.log("Total: $500".replace(/\w+:\s.(\d+)/g, "You owe me U\$D$1!")); // Output: "You owe me U$D500!"
console.log("12ab34cd56".replace(/(\D)/g, "-$1-")); // Output: "12-a--b-34-c--d-56"
console.log("12ab34cd56".replace(/(\D+)/g, "-$1-")); // Output: "12-ab-34-cd-56"
console.log("111 xxx 111 and xxxyy".replace(/(\d+) (x+) \1 and \2/g, "-$1-")); // Output: "-111-yy"
console.log("111 xxx 222".replace(/(\d+) x+ \1/g, "no matches")); // Output: "111 xxx 222" (no matches)
// Don't capture groups
console.log("aaa bbb ccc".replace(/(a+) (?:b+) (c+)/g, "1: $1\n2: $2")); // Output: "1: aaa\n2: ccc" ($2 is NOT "bbb")


// Example: Match the site <title> from https://fiqus.coop
const https = require("https");
https.get("https://fiqus.coop", (resp) => {
  let data = "";
  resp.on("data", (chunk) => data += chunk);
  resp.on("end", () => {
    const title = data.match(/<title>(.*)<\/title>/im)[1];
    console.log("The title at Fiqus site is: "+title);
  });
});


// For detailed information about finite state machines:
// https://brilliant.org/wiki/regular-languages/
	// Regular expressions: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions
	// Cheatsheet: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet


	// Most languages have "native/direct" ways of construct regular expressions
	let re_native = /abc/;
	// And also have its functional/modular constructors
	let re_obj = new RegExp("abc");

	// Elixir :: https://hexdocs.pm/elixir/Regex.html
	// regexp = ~r/abc/
	// regexp = Regex.compile("abc")

	// Python :: https://docs.python.org/3/howto/regex.html
	// regexp = re.compile("abc")


	// Regular expressions have "modifiers" that affects the matching behaviour
	// https://www.regular-expressions.info/modifiers.html

	// g: global search (match or replace all occurrences)
	console.log("the A letter".replace(/e/, "")); // Output: "th A letter"
	console.log("the A letter".replace(/e/g, "")); // Output: "th A lttr"
	// i: case insensitive
	console.log("the A".match(/a/)); // Output: null
	console.log("the A".match(/a/i)); // Output: [ 'A', index: 4, input: 'the A' ]
	console.log("the A".match(/a/gi)); // Output: [ 'A' ]
	// m: multiline strings (ie: matching an entire text file string with new lines [only affects the behavior of ^ and $])
	// u: unicode strings (matching strings containing unicode characters)
	// And you can mix'em
	console.log("A NICE STRING IS nicer NOW 👌👌👌!".replace(/nice[r]?\|👌\|!/igu, "")); // Output: "A STRING IS * NOW ****"


	// Most commonly used character classes
	// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes

	// Character: "\" (escape next character)
	console.log("$1234".replace(/\$/g, "U\$D")); // Output: "U$D1234"
	console.log("the [match] (here)!".replace(/the \[match\] \(here\)/g, "-")); // Output: "-!"
	// Character: "\d" (matches numbers)
	console.log("the number is $1234".replace(/\d/g, "")); // Output: "the number is $***"
	// Character: "\D" (matches NOT numbers)
	console.log("the number is $1234".replace(/\D/g, "")); // Output: "**************1234"
	// Character: "\w" (matches any alphanumeric character)
	console.log("the number is $1234".replace(/\w+ is \$\w/g, "")); // Output: "the 234"
	// Character: "\W" (matches any NOT alphanumeric character)
	console.log("the number is $1234! right???".replace(/\W/g, "")); // Output: "thenumberis1234right**"
	// Character: "\s" (matches whitespace character)
	console.log("the number is $1234".replace(/\s/g, "-")); // Output: "the-number-is-$1234"
	// Character: "\S" (you can imagine =)
	console.log("the number is $1234".replace(/\S/g, "-")); // Output: "--- ------ -- -----"


	// Most commonly used patterns
	// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Assertions
	// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Groups_and_Ranges
	// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Quantifiers

	// Pattern: "^" and "$" (matches the start and the end of a string, respectively)
	console.log("the start of the art".replace(/art/g, "")); // Output: "the st of the *"
	console.log("the start of the art".replace(/^the/g, "")); // Output: " start of the art"
	console.log("the start of the art".replace(/art$/g, "")); // Output: "the start of the "

	// Pattern: "." (matches any single character except line terminators)
	console.log("no last letters".replace(/.s/g, "s")); // Output: "no lst lette*s"

	// Pattern: "+" (matches at least one time, repeat "n" times)
	console.log("ac abc aabbcc aaabbbccc".replace(/ab+c/g, "")); // Output: "ac ac aacc" (note the last result "aa*cc")

	// Pattern: "*" (matches 0 or "n" times)
	console.log("ac abc aabbcc aaabbbccc".replace(/abc/g, "")); // Output: "* * ac aacc"

	// Pattern: "?" (the previous character or group may or may not be present)
	console.log("ac abc abbc".replace(/ab?c/g, "")); // Output: " * abbc"

	// Pattern: "x{n}", "x{n,}", "x{n,m}" (matches "n" to "m" times)
	console.log("aaabbbccc".replace(/ab{3}c/g, "")); // Output: "aacc"
	console.log("aaabbbccc".replace(/ab{2}c/g, "no matches")); // Output: "aaabbbccc" (no matches)
	console.log("aaabbbccc".replace(/ab{2,}c/g, "")); // Output: "aacc"
	console.log("aaabccc".replace(/ab{1,3}c/g, "")); // Output: "aacc"
	console.log("aaabccc".replace(/ab{2,3}c/g, "no matches")); // Output: "aaabccc" (no matches)

	// Pattern: "x\|y" (matches "x" or "y")
	console.log("green apples and red ones".replace(/green\|red/g, "")); // Output: " apples and * ones"

	// Pattern: "[]" (matches any one of the enclosed characters)
	console.log("abcdefghi".replace(/[fed]/g, "")); // Output: "abc**ghi"
	console.log("abcdefghi".replace(/[^fed]/g, "")); // Output: "def*"
	// The "-" sign acts as a range (for letters and numbers)
	console.log("abcdefghi".replace(/[b-h]/g, "")); // Output: "a******i"
	console.log("ahgcedbfi".replace(/[b-h]/g, "")); // Output: "a******i"
	console.log("a".replace(/a[bcd]?/g, "")); // Output: ""
	console.log("abcd".replace(/a[bcd]?/g, "")); // Output: "cd" (note that it is NOT "a*")
	console.log("abcd".replace(/a[bc]?d/g, "no matches")); // Output: "abcd" (no matches)

	// Pattern: "()" (groups the patterns and capture the matching strings)
	console.log("Total: $500".replace(/\w+:\s.(\d+)/g, "You owe me U\$D$1!")); // Output: "You owe me U$D500!"
	console.log("12ab34cd56".replace(/(\D)/g, "-$1-")); // Output: "12-a--b-34-c--d-56"
	console.log("12ab34cd56".replace(/(\D+)/g, "-$1-")); // Output: "12-ab-34-cd-56"
	console.log("111 xxx 111 and xxxyy".replace(/(\d+) (x+) \1 and \2/g, "-$1-")); // Output: "-111-yy"
	console.log("111 xxx 222".replace(/(\d+) x+ \1/g, "no matches")); // Output: "111 xxx 222" (no matches)
	// Don't capture groups
	console.log("aaa bbb ccc".replace(/(a+) (?:b+) (c+)/g, "1: $1\n2: $2")); // Output: "1: aaa\n2: ccc" ($2 is NOT "bbb")


	// Example: Match the site <title> from https://fiqus.coop
	const https = require("https");
	https.get("https://fiqus.coop", (resp) => {
	let data = "";
	resp.on("data", (chunk) => data += chunk);
	resp.on("end", () => {
	const title = data.match(/<title>(.*)<\/title>/im)[1];
	console.log("The title at Fiqus site is: "+title);
	});
	});


	// For detailed information about finite state machines:
	// https://brilliant.org/wiki/regular-languages/