Skip to content

Instantly share code, notes, and snippets.

@diegomanuel
Last active February 4, 2020 15:56
Show Gist options
  • Save diegomanuel/8dd3ebf0b1c2de2a48e70e3f5e5eda91 to your computer and use it in GitHub Desktop.
Save diegomanuel/8dd3ebf0b1c2de2a48e70e3f5e5eda91 to your computer and use it in GitHub Desktop.
Quick introduction to regular expressions to make a short class =]
// Regular expressions: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions
// Cheatsheet: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet
// Most languages have "native/direct" ways of construct regular expressions
let re_native = /abc/;
// And also have its functional/modular constructors
let re_obj = new RegExp("abc");
// Elixir :: https://hexdocs.pm/elixir/Regex.html
// regexp = ~r/abc/
// regexp = Regex.compile("abc")
// Python :: https://docs.python.org/3/howto/regex.html
// regexp = re.compile("abc")
// Regular expressions have "modifiers" that affects the matching behaviour
// https://www.regular-expressions.info/modifiers.html
// g: global search (match or replace all occurrences)
console.log("the A letter".replace(/e/, "")); // Output: "th A letter"
console.log("the A letter".replace(/e/g, "")); // Output: "th A lttr"
// i: case insensitive
console.log("the A".match(/a/)); // Output: null
console.log("the A".match(/a/i)); // Output: [ 'A', index: 4, input: 'the A' ]
console.log("the A".match(/a/gi)); // Output: [ 'A' ]
// m: multiline strings (ie: matching an entire text file string with new lines [only affects the behavior of ^ and $])
// u: unicode strings (matching strings containing unicode characters)
// And you can mix'em
console.log("A NICE STRING IS nicer NOW 👌👌👌!".replace(/nice[r]?|👌|!/igu, "*")); // Output: "A * STRING IS * NOW ****"
// Most commonly used character classes
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Character_Classes
// Character: "\" (escape next character)
console.log("$1234".replace(/\$/g, "U\$D")); // Output: "U$D1234"
console.log("the [match] (here)!".replace(/the \[match\] \(here\)/g, "-")); // Output: "-!"
// Character: "\d" (matches numbers)
console.log("the number is $1234".replace(/\d/g, "*")); // Output: "the number is $****"
// Character: "\D" (matches NOT numbers)
console.log("the number is $1234".replace(/\D/g, "*")); // Output: "***************1234"
// Character: "\w" (matches any alphanumeric character)
console.log("the number is $1234".replace(/\w+ is \$\w/g, "*")); // Output: "the *234"
// Character: "\W" (matches any NOT alphanumeric character)
console.log("the number is $1234! right???".replace(/\W/g, "*")); // Output: "the*number*is**1234**right***"
// Character: "\s" (matches whitespace character)
console.log("the number is $1234".replace(/\s/g, "-")); // Output: "the-number-is-$1234"
// Character: "\S" (you can imagine =)
console.log("the number is $1234".replace(/\S/g, "-")); // Output: "--- ------ -- -----"
// Most commonly used patterns
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Assertions
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Groups_and_Ranges
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Quantifiers
// Pattern: "^" and "$" (matches the start and the end of a string, respectively)
console.log("the start of the art".replace(/art/g, "*")); // Output: "the st* of the *"
console.log("the start of the art".replace(/^the/g, "*")); // Output: "* start of the art"
console.log("the start of the art".replace(/art$/g, "*")); // Output: "the start of the *"
// Pattern: "." (matches any single character except line terminators)
console.log("no last letters".replace(/.s/g, "*s")); // Output: "no l*st lette*s"
// Pattern: "+" (matches at least one time, repeat "n" times)
console.log("ac abc aabbcc aaabbbccc".replace(/ab+c/g, "*")); // Output: "ac * a*c aa*cc" (note the last result "aa*cc")
// Pattern: "*" (matches 0 or "n" times)
console.log("ac abc aabbcc aaabbbccc".replace(/ab*c/g, "*")); // Output: "* * a*c aa*cc"
// Pattern: "?" (the previous character or group may or may not be present)
console.log("ac abc abbc".replace(/ab?c/g, "*")); // Output: "* * abbc"
// Pattern: "x{n}", "x{n,}", "x{n,m}" (matches "n" to "m" times)
console.log("aaabbbccc".replace(/ab{3}c/g, "*")); // Output: "aa*cc"
console.log("aaabbbccc".replace(/ab{2}c/g, "no matches")); // Output: "aaabbbccc" (no matches)
console.log("aaabbbccc".replace(/ab{2,}c/g, "*")); // Output: "aa*cc"
console.log("aaabccc".replace(/ab{1,3}c/g, "*")); // Output: "aa*cc"
console.log("aaabccc".replace(/ab{2,3}c/g, "no matches")); // Output: "aaabccc" (no matches)
// Pattern: "x|y" (matches "x" or "y")
console.log("green apples and red ones".replace(/green|red/g, "*")); // Output: "* apples and * ones"
// Pattern: "[]" (matches any one of the enclosed characters)
console.log("abcdefghi".replace(/[fed]/g, "*")); // Output: "abc***ghi"
console.log("abcdefghi".replace(/[^fed]/g, "*")); // Output: "***def***"
// The "-" sign acts as a range (for letters and numbers)
console.log("abcdefghi".replace(/[b-h]/g, "*")); // Output: "a*******i"
console.log("ahgcedbfi".replace(/[b-h]/g, "*")); // Output: "a*******i"
console.log("a".replace(/a[bcd]?/g, "*")); // Output: "*"
console.log("abcd".replace(/a[bcd]?/g, "*")); // Output: "*cd" (note that it is NOT "a*")
console.log("abcd".replace(/a[bc]?d/g, "no matches")); // Output: "abcd" (no matches)
// Pattern: "()" (groups the patterns and capture the matching strings)
console.log("Total: $500".replace(/\w+:\s.(\d+)/g, "You owe me U\$D$1!")); // Output: "You owe me U$D500!"
console.log("12ab34cd56".replace(/(\D)/g, "-$1-")); // Output: "12-a--b-34-c--d-56"
console.log("12ab34cd56".replace(/(\D+)/g, "-$1-")); // Output: "12-ab-34-cd-56"
console.log("111 xxx 111 and xxxyy".replace(/(\d+) (x+) \1 and \2/g, "-$1-")); // Output: "-111-yy"
console.log("111 xxx 222".replace(/(\d+) x+ \1/g, "no matches")); // Output: "111 xxx 222" (no matches)
// Don't capture groups
console.log("aaa bbb ccc".replace(/(a+) (?:b+) (c+)/g, "1: $1\n2: $2")); // Output: "1: aaa\n2: ccc" ($2 is NOT "bbb")
// Example: Match the site <title> from https://fiqus.coop
const https = require("https");
https.get("https://fiqus.coop", (resp) => {
let data = "";
resp.on("data", (chunk) => data += chunk);
resp.on("end", () => {
const title = data.match(/<title>(.*)<\/title>/im)[1];
console.log("The title at Fiqus site is: "+title);
});
});
// For detailed information about finite state machines:
// https://brilliant.org/wiki/regular-languages/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment