Last active
August 22, 2023 19:45
-
-
Save westc/a921e805036707d7c5f818a32124789a to your computer and use it in GitHub Desktop.
A wrapper class for making it easier to work with regular expressions and also for accessing named capture groups (which wasn't when this solution was written).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Author: Chris West | |
* Source: https://gist.github.com/westc/a921e805036707d7c5f818a32124789a#file-regexp-cls | |
* Description: | |
* A wrapper class for making it easier to work with regular expressions and | |
* also for accessing named capture groups (which wasn't when this solution | |
* was written). | |
*/ | |
public class RegExp { | |
private Pattern pat; | |
private Map<String,Integer> groupNameToIndex; | |
public String source { get; private set; } | |
public Integer groupCount { get; private set; } | |
public String[] groupNames { get; private set; } | |
private final static Pattern PAT_PARSE = Pattern.compile(String.join( | |
new String[]{ | |
'\\(\\?<([^>]+)>', // named capture group | |
'(\\((?!\\?))', // indexed capture group | |
'\\[(?:\\\\.|[^\\]\\\\]+)*\\]', // char group (to avoid false positives) | |
'\\\\Q[\\s\\S]*?\\\\E' // escaped chars (to avoid false positives) | |
}, | |
'|' | |
)); | |
/** | |
* Constructor | |
*/ | |
public RegExp(String strRegExp) { | |
// Validate the regexp by compiling it as a pattern. | |
this.source = strRegExp; | |
this.pat = Pattern.compile(strRegExp); | |
// Extract the group names and group numbers. | |
System.Matcher mRegExp = PAT_PARSE.matcher(strRegExp); | |
Integer groupNumber = 0; | |
this.groupNameToIndex = new Map<String,Integer>(); | |
while (mRegExp.find()) { | |
String groupName = mRegExp.group(1); | |
if (groupName != null) { | |
groupNameToIndex.put(groupName, ++groupNumber); | |
} | |
else if (mRegExp.group(2) != null) { | |
++groupNumber; | |
} | |
} | |
this.groupCount = groupNumber; | |
this.groupNames = new List<String>(this.groupNameToIndex.keySet()); | |
} | |
public Boolean matches(String toTest) { | |
return this.pat.matcher(toTest).matches(); | |
} | |
public static Boolean matches(String strRegExp, String toTest) { | |
return Pattern.matches(strRegExp, toTest); | |
} | |
public Boolean isIn(String toTest) { | |
return Pattern.compile(this.source).matcher(toTest).find(); | |
} | |
public static Boolean isIn(String strRegExp, String toTest) { | |
return Pattern.compile(strRegExp).matcher(toTest).find(); | |
} | |
public Searcher search(String toSearch) { | |
return new Searcher(this, toSearch); | |
} | |
public static Searcher search(String strRegExp, String toSearch) { | |
return new Searcher(strRegExp, toSearch); | |
} | |
public Match getFirstMatch(String toSearch) { | |
return new Searcher(this, toSearch).match; | |
} | |
public static Match getFirstMatch(String strRegExp, String toSearch) { | |
return new Searcher(strRegExp, toSearch).match; | |
} | |
public Match[] getAllMatches(String toSearch) { | |
return new Searcher(this, toSearch).allMatches; | |
} | |
public static Match[] getAllMatches(String strRegExp, String toSearch) { | |
return new Searcher(strRegExp, toSearch).allMatches; | |
} | |
public String replaceAll(String toUpdate, String replacement) { | |
return this.pat.matcher(toUpdate).replaceAll(replacement); | |
} | |
public static String replaceAll(String strRegExp, String toUpdate, String replacement) { | |
return Pattern.compile(strRegExp).matcher(toUpdate).replaceAll(replacement); | |
} | |
public String replaceFirst(String toUpdate, String replacement) { | |
return this.pat.matcher(toUpdate).replaceFirst(replacement); | |
} | |
public static String replaceFirst(String strRegExp, String toUpdate, String replacement) { | |
return Pattern.compile(strRegExp).matcher(toUpdate).replaceFirst(replacement); | |
} | |
public Integer getGroupIndex(String groupName) { | |
return this.groupNameToIndex.get(groupName); | |
} | |
public String[] split(String toSplit) { | |
return this.pat.split(toSplit); | |
} | |
public String[] split(String toSplit, Integer intLimit) { | |
return this.pat.split(toSplit, intLimit); | |
} | |
public static String[] split(String strRegExp, String toSplit) { | |
return Pattern.compile(strRegExp).split(toSplit); | |
} | |
public static String[] split(String strRegExp, String toSplit, Integer intLimit) { | |
return Pattern.compile(strRegExp).split(toSplit, intLimit); | |
} | |
public static String quote(String toQuote) { | |
return Pattern.quote(toQuote); | |
} | |
public static String quoteReplacement(String toQuote) { | |
return System.Matcher.quoteReplacement(toQuote); | |
} | |
public class Searcher { | |
private System.Matcher sysMatcher; | |
public RegExp regex { get; private set; } | |
public String input { get; private set; } | |
public Boolean hasMatch { get; private set; } | |
public Integer findIndex { get; private set; } | |
public Integer findCount { get { return this.findIndex + 1; } } | |
public Boolean isAtEnd { get { return this.sysMatcher.hitEnd(); } } | |
public Match match { get; private set; } | |
public Match[] allMatches { | |
get { | |
Searcher s = new Searcher(this.regex, this.input); | |
Match[] allMatches = new Match[0]; | |
while (s.hasMatch) { | |
allMatches.add(s.match); | |
s.next(); | |
} | |
return allMatches; | |
} | |
} | |
public Searcher(String strRegExp, String toSearch) { | |
this(new RegExp(strRegExp), toSearch); | |
} | |
public Searcher(RegExp regex, String toSearch) { | |
this.input = toSearch; | |
this.regex = regex; | |
this.sysMatcher = regex.pat.matcher(toSearch); | |
this.findIndex = -1; | |
this.next(); | |
} | |
public Boolean next() { | |
this.hasMatch = this.sysMatcher.find(); | |
if (this.hasMatch) { | |
this.findIndex++; | |
String[] groups = new String[0]; | |
Integer[] starts = new Integer[0]; | |
Integer[] ends = new Integer[0]; | |
for (Integer i = 0, l = this.sysMatcher.groupCount(); i <= l; i++) { | |
groups.add(this.sysMatcher.group(i)); | |
starts.add(this.sysMatcher.start(i)); | |
ends.add(this.sysMatcher.end(i)); | |
} | |
this.match = new Match( | |
groups, | |
starts, | |
ends, | |
this | |
); | |
} | |
else this.match = null; | |
return this.hasMatch; | |
} | |
public void reset() { | |
this.sysMatcher.reset(); | |
this.findIndex = -1; | |
this.next(); | |
} | |
} | |
public class Match { | |
private Map<String,Integer> groupNameToIndex { get { return searcher.regex.groupNameToIndex; } } | |
public Searcher searcher { get; private set; } | |
private String[] realStringValues; | |
public String[] stringValues { get { return realStringValues.clone(); } } | |
public Integer[] stringStarts { get { return stringStarts.clone(); } private set; } | |
public Integer[] stringEnds { get { return stringEnds.clone(); } private set; } | |
public RegExp regex { get { return searcher.regex; } } | |
public String input { get { return searcher.input; } } | |
public Integer groupCount { get { return realStringValues.size() - 1; } } | |
public String stringValue { get { return realStringValues[0]; } } | |
public Integer stringStart { get { return stringStarts[0]; } } | |
public Integer stringEnd { get { return stringEnds[0]; } } | |
public Match(String[] stringValues, Integer[] stringStarts, Integer[] stringEnds, Searcher searcher) { | |
this.realStringValues = stringValues; | |
this.stringStarts = stringStarts; | |
this.stringEnds = stringEnds; | |
this.searcher = searcher; | |
} | |
public String getGroupString(Integer i) { | |
return this.realStringValues[i]; | |
} | |
public String getGroupString(String name) { | |
return this.realStringValues[this.groupNameToIndex.get(name)]; | |
} | |
public Integer[] getGroupStarts() { | |
return this.stringStarts.clone(); | |
} | |
public Integer getGroupStart(Integer i) { | |
return this.stringStarts[i]; | |
} | |
public Integer getGroupStart(String name) { | |
return this.stringStarts[this.groupNameToIndex.get(name)]; | |
} | |
public Integer[] getGroupEnds() { | |
return this.stringEnds.clone(); | |
} | |
public Integer getGroupEnd(Integer i) { | |
return this.stringEnds[i]; | |
} | |
public Integer getGroupEnd(String name) { | |
return this.stringEnds[this.groupNameToIndex.get(name)]; | |
} | |
public String format(String format) { | |
String[] args = this.stringValues; | |
format = format.replace('{{', '{' + args.size() + '}'); | |
args.add('{'); | |
format = format.replace('}}', '{' + args.size() + '}'); | |
args.add('}'); | |
for (String groupName : this.groupNameToIndex.keySet()) { | |
format = format.replace('{' + groupName + '}', '{' + this.groupNameToIndex.get(groupName) + '}'); | |
} | |
return String.format(format, args); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Author: Chris West | |
* Source: https://gist.github.com/westc/a921e805036707d7c5f818a32124789a#file-regexp_tests-cls | |
* Description: | |
* Used to test the different features provided in the RegExp class. | |
*/ | |
@isTest | |
public class RegExp_Tests { | |
/** | |
* Test out | |
* - `RegExp.getAllMatches()` | |
* - `RegExp#.getAllMatches()` | |
* - `RegExp.Searcher#.allMatches` | |
*/ | |
@isTest | |
public static void testGetAllMatches() { | |
String strRegExp = '(\\d)(?<extraDigits>\\d*)'; | |
String input = '1 + 23 != 456'; | |
RegExp.Match[] instanceMatches = new RegExp(strRegExp).getAllMatches(input); | |
RegExp.Match[] staticMatches = RegExp.getAllMatches(strRegExp, input); | |
RegExp.Match[] allMatches = new RegExp.Searcher(strRegExp, input).allMatches; | |
Matcher s = Pattern.compile(strRegExp).matcher(input); | |
for (Integer i = 0, l = allMatches.size(); i < l; i++) { | |
s.find(); | |
for (RegExp.Match m : new RegExp.Match[]{instanceMatches[i], staticMatches[i], allMatches[i]}) { | |
Assert.areEqual(m.stringValue, s.group()); | |
Assert.areEqual(m.getGroupString(1), s.group(1)); | |
Assert.areEqual(m.getGroupString('extraDigits'), s.group(2)); | |
} | |
} | |
} | |
/** | |
* Test out | |
* - `RegExp.getFirstMatch()` | |
* - `RegExp#.getFirstMatch()` | |
* - `RegExp.Searcher#.match` | |
*/ | |
@isTest | |
public static void testGetFirstMatch() { | |
String strRegExp = '(\\d)(?<extraDigits>\\d*)'; | |
String input = '1 + 23 != 456'; | |
RegExp.Match instanceMatch = new RegExp(strRegExp).getFirstMatch(input); | |
RegExp.Match staticMatch = RegExp.getFirstMatch(strRegExp, input); | |
RegExp.Match match = new RegExp.Searcher(strRegExp, input).match; | |
Matcher s = Pattern.compile(strRegExp).matcher(input); | |
s.find(); | |
for (RegExp.Match m : new RegExp.Match[]{instanceMatch, staticMatch, match}) { | |
Assert.areEqual(m.stringValue, s.group()); | |
Assert.areEqual(m.getGroupString(1), s.group(1)); | |
Assert.areEqual(m.getGroupString('extraDigits'), s.group(2)); | |
} | |
} | |
/** | |
* Test out `RegExp.Match#.format()`. | |
*/ | |
@isTest | |
public static void testFormat() { | |
String strRegExp = '(?m)^(?<relation>\\w+):\\s+(?<firstName>\\w+)\\s+(?<lastName>\\w+)'; | |
String input = String.join(new String[] { | |
'Father: John Smith', | |
'Mother: Jane Smith', | |
'Brother: Gary Peyton' | |
}, '\n'); | |
RegExp.Match[] matches = RegExp.getAllMatches(strRegExp, input); | |
String format = 'The "{relation}" ({{relation}}) is "{lastName}, {firstName}".'; | |
Assert.areEqual('The "Father" ({relation}) is "Smith, John".', matches[0].format(format)); | |
Assert.areEqual('The "Mother" ({relation}) is "Smith, Jane".', matches[1].format(format)); | |
Assert.areEqual('The "Brother" ({relation}) is "Peyton, Gary".', matches[2].format(format)); | |
} | |
/** | |
* Test out | |
* - `RegExp.Match#.getGroupEnd()` | |
* - `RegExp.Match#.getGroupEnds()` | |
* - `RegExp.Match#.getGroupStart()` | |
* - `RegExp.Match#.getGroupStarts()` | |
*/ | |
@isTest | |
public static void testMatchGetGroupRanges() { | |
RegExp rgx = new RegExp('^(?<lastName>\\w+),\\s+(?<firstName>\\w+)$'); | |
RegExp.Match m = rgx.search('Frank, Anne').match; | |
Assert.areEqual(new Integer[]{0,0,7}, m.getGroupStarts()); | |
Assert.areEqual(0, m.getGroupStart(0)); | |
Assert.areEqual(0, m.getGroupStart(1)); | |
Assert.areEqual(7, m.getGroupStart(2)); | |
Assert.areEqual(0, m.getGroupStart('lastName')); | |
Assert.areEqual(7, m.getGroupStart('firstName')); | |
Assert.areEqual(new Integer[]{11,5,11}, m.getGroupEnds()); | |
Assert.areEqual(11, m.getGroupEnd(0)); | |
Assert.areEqual(5, m.getGroupEnd(1)); | |
Assert.areEqual(11, m.getGroupEnd(2)); | |
Assert.areEqual(5, m.getGroupEnd('lastName')); | |
Assert.areEqual(11, m.getGroupEnd('firstName')); | |
} | |
/** | |
* Test out | |
* - `RegExp.Match#.regex` | |
* - `RegExp.Match#.searcher` | |
* - `RegExp.Match#.input` | |
* - `RegExp.Match#.groupCount` | |
* - `RegExp.Match#.stringStart` | |
* - `RegExp.Match#.stringEnd` | |
*/ | |
@isTest | |
public static void testMatchProperties() { | |
RegExp rgx = new RegExp('(?<lastName>\\w+),\\s+(?<firstName>\\w+)'); | |
RegExp.Searcher s = rgx.search('Name:\tFrank, Anne'); | |
RegExp.Match m = s.match; | |
Assert.areEqual(rgx, m.regex); | |
Assert.areEqual(s, m.searcher); | |
Assert.areEqual(s.input, m.input); | |
Assert.areEqual(2, m.groupCount); | |
Assert.areEqual(6, m.stringStart); | |
Assert.areEqual(17, m.stringEnd); | |
} | |
/** | |
* Test out | |
* - `RegExp.Searcher#.next()` | |
* - `RegExp.Searcher#.reset()` | |
*/ | |
@isTest | |
public static void testMatcherReset() { | |
String strRegExp = '(?m)^(?<relation>\\w+):\\s+(?<firstName>\\w+)\\s+(?<lastName>\\w+)'; | |
String input = String.join(new String[] { | |
'Father: John Smith', | |
'Mother: Jane Smith', | |
'Brother: Gary Peyton' | |
}, '\n'); | |
RegExp.Searcher s = RegExp.search(strRegExp, input); | |
Assert.areEqual('Father', s.match.getGroupString('relation')); | |
Assert.areEqual(true, s.next()); | |
Assert.areEqual('Mother', s.match.getGroupString('relation')); | |
s.reset(); | |
Assert.areEqual('Father', s.match.getGroupString('relation')); | |
Assert.areEqual(true, s.next()); | |
Assert.areEqual('Mother', s.match.getGroupString('relation')); | |
Assert.areEqual(true, s.next()); | |
Assert.areEqual('Brother', s.match.getGroupString('relation')); | |
Assert.areEqual(false, s.next()); | |
Assert.areEqual(null, s.match); | |
s.reset(); | |
Assert.areEqual('Father', s.match.getGroupString('relation')); | |
} | |
/** | |
* Test out | |
* - `RegExp.Searcher#.findIndex` | |
* - `RegExp.Searcher#.findCount` | |
* - `RegExp.Searcher#.isAtEnd` | |
*/ | |
@isTest | |
public static void testMatcherProperties() { | |
String strRegExp = '\\d+'; | |
String input = '1, 23, 456 and 7890'; | |
RegExp.Searcher s = RegExp.search(strRegExp, input); | |
Matcher realMR = Pattern.compile(strRegExp).matcher(input); | |
for (Integer findCount = 1; s.hasMatch; findCount++) { | |
// Make sure that isAtEnd is only true when at the end of the string | |
// even though hasMatch is still true until next() is called | |
// again. | |
if (findCount == 4) Assert.areEqual(s.isAtEnd, s.hasMatch); | |
else Assert.areNotEqual(s.isAtEnd, s.hasMatch); | |
// Do the other normal assertions. | |
Assert.areEqual(true, realMR.find()); | |
Assert.areEqual(realMR.hitEnd(), s.isAtEnd); | |
Assert.areEqual(findCount - 1, s.findIndex); | |
Assert.areEqual(findCount, s.findCount); | |
s.next(); | |
} | |
Assert.areEqual(false, realMR.find()); | |
Assert.areEqual(realMR.hitEnd(), s.isAtEnd); | |
Assert.areEqual(3, s.findIndex); | |
Assert.areEqual(4, s.findCount); | |
s.reset(); | |
Assert.areEqual(0, s.findIndex); | |
Assert.areEqual(1, s.findCount); | |
} | |
/** | |
* Test out | |
* - `RegExp.isIn()` | |
* - `RegExp#.isIn()` | |
* - `RegExp.matches()` | |
* - `RegExp#.matches()` | |
*/ | |
@isTest | |
public static void testRegExpTesters() { | |
Assert.isTrue(new RegExp('(?=\\d)').isIn('You are not 50 yet.')); | |
Assert.isTrue(RegExp.isIn('\\d{2,}', 'You are not 50 yet.')); | |
Assert.isFalse(RegExp.isIn('\\d{2,}', 'You are not here.')); | |
Assert.isTrue(new RegExp('\\d{2,}').matches('12')); | |
Assert.isTrue(RegExp.matches('\\d{2,}', '12345')); | |
Assert.isTrue(RegExp.matches('\\d{2,5}', '12345')); | |
Assert.isFalse(RegExp.matches('\\d{2,5}', '123456')); | |
} | |
/** | |
* Test out | |
* - `RegExp.replaceAll()` | |
* - `RegExp#.replaceAll()` | |
* - `RegExp.replaceFirst()` | |
* - `RegExp#.replaceFirst()` | |
*/ | |
@isTest | |
public static void testRegExpReplacers() { | |
String strRegExp = '(.)(?<two>..)'; | |
String input = 'Hello world!'; | |
Matcher m = Pattern.compile(strRegExp).matcher(input); | |
RegExp rgx = new RegExp(strRegExp); | |
String replacement = '${two}$1$2'; | |
Assert.areEqual(m.replaceAll(replacement), rgx.replaceAll(input, replacement)); | |
Assert.areEqual(m.replaceAll(replacement), RegExp.replaceAll(strRegExp, input, replacement)); | |
Assert.areEqual(m.replaceFirst(replacement), rgx.replaceFirst(input, replacement)); | |
Assert.areEqual(m.replaceFirst(replacement), RegExp.replaceFirst(strRegExp, input, replacement)); | |
} | |
/** | |
* Test out | |
* - `RegExp.split()` | |
* - `RegExp#.split()` | |
*/ | |
@isTest | |
public static void testRegExpSplitters() { | |
String strRegExp = '\\s*[,;]\\s*'; | |
String input = '1, 2;3\n;7'; | |
RegExp rgx = new RegExp(strRegExp); | |
Pattern pat = Pattern.compile(strRegExp); | |
Assert.areEqual(pat.split(input), rgx.split(input)); | |
Assert.areEqual(pat.split(input, 2), rgx.split(input, 2)); | |
Assert.areEqual(pat.split(input), RegExp.split(strRegExp, input)); | |
Assert.areEqual(pat.split(input, 2), RegExp.split(strRegExp, input, 2)); | |
} | |
/** | |
* Test out | |
* - `RegExp.quote()` | |
* - `RegExp.quoteReplacement()` | |
*/ | |
@isTest | |
public static void testRegExpQuoters() { | |
Assert.areEqual(Pattern.quote('[]\\w+$1'), RegExp.quote('[]\\w+$1')); | |
Assert.areEqual(Matcher.quoteReplacement('[]\\w+$1'), RegExp.quoteReplacement('[]\\w+$1')); | |
} | |
/** | |
* Test out `RegExp.getGroupIndex()` | |
*/ | |
@isTest | |
public static void testRegExpGetGroupIndex() { | |
String strRegExp = '(.)(?<two>..)'; | |
RegExp rgx = new RegExp(strRegExp); | |
Assert.areEqual(null, rgx.getGroupIndex('one')); | |
Assert.areEqual(2, rgx.getGroupIndex('two')); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment