Skip to content

Instantly share code, notes, and snippets.

@westc
Last active August 22, 2023 19:45
Show Gist options
  • Save westc/a921e805036707d7c5f818a32124789a to your computer and use it in GitHub Desktop.
Save westc/a921e805036707d7c5f818a32124789a to your computer and use it in GitHub Desktop.
A wrapper class for making it easier to work with regular expressions and also for accessing named capture groups (which wasn't when this solution was written).
/**
* Author: Chris West
* Source: https://gist.github.com/westc/a921e805036707d7c5f818a32124789a#file-regexp-cls
* Description:
* A wrapper class for making it easier to work with regular expressions and
* also for accessing named capture groups (which wasn't when this solution
* was written).
*/
public class RegExp {
private Pattern pat;
private Map<String,Integer> groupNameToIndex;
public String source { get; private set; }
public Integer groupCount { get; private set; }
public String[] groupNames { get; private set; }
private final static Pattern PAT_PARSE = Pattern.compile(String.join(
new String[]{
'\\(\\?<([^>]+)>', // named capture group
'(\\((?!\\?))', // indexed capture group
'\\[(?:\\\\.|[^\\]\\\\]+)*\\]', // char group (to avoid false positives)
'\\\\Q[\\s\\S]*?\\\\E' // escaped chars (to avoid false positives)
},
'|'
));
/**
* Constructor
*/
public RegExp(String strRegExp) {
// Validate the regexp by compiling it as a pattern.
this.source = strRegExp;
this.pat = Pattern.compile(strRegExp);
// Extract the group names and group numbers.
System.Matcher mRegExp = PAT_PARSE.matcher(strRegExp);
Integer groupNumber = 0;
this.groupNameToIndex = new Map<String,Integer>();
while (mRegExp.find()) {
String groupName = mRegExp.group(1);
if (groupName != null) {
groupNameToIndex.put(groupName, ++groupNumber);
}
else if (mRegExp.group(2) != null) {
++groupNumber;
}
}
this.groupCount = groupNumber;
this.groupNames = new List<String>(this.groupNameToIndex.keySet());
}
public Boolean matches(String toTest) {
return this.pat.matcher(toTest).matches();
}
public static Boolean matches(String strRegExp, String toTest) {
return Pattern.matches(strRegExp, toTest);
}
public Boolean isIn(String toTest) {
return Pattern.compile(this.source).matcher(toTest).find();
}
public static Boolean isIn(String strRegExp, String toTest) {
return Pattern.compile(strRegExp).matcher(toTest).find();
}
public Searcher search(String toSearch) {
return new Searcher(this, toSearch);
}
public static Searcher search(String strRegExp, String toSearch) {
return new Searcher(strRegExp, toSearch);
}
public Match getFirstMatch(String toSearch) {
return new Searcher(this, toSearch).match;
}
public static Match getFirstMatch(String strRegExp, String toSearch) {
return new Searcher(strRegExp, toSearch).match;
}
public Match[] getAllMatches(String toSearch) {
return new Searcher(this, toSearch).allMatches;
}
public static Match[] getAllMatches(String strRegExp, String toSearch) {
return new Searcher(strRegExp, toSearch).allMatches;
}
public String replaceAll(String toUpdate, String replacement) {
return this.pat.matcher(toUpdate).replaceAll(replacement);
}
public static String replaceAll(String strRegExp, String toUpdate, String replacement) {
return Pattern.compile(strRegExp).matcher(toUpdate).replaceAll(replacement);
}
public String replaceFirst(String toUpdate, String replacement) {
return this.pat.matcher(toUpdate).replaceFirst(replacement);
}
public static String replaceFirst(String strRegExp, String toUpdate, String replacement) {
return Pattern.compile(strRegExp).matcher(toUpdate).replaceFirst(replacement);
}
public Integer getGroupIndex(String groupName) {
return this.groupNameToIndex.get(groupName);
}
public String[] split(String toSplit) {
return this.pat.split(toSplit);
}
public String[] split(String toSplit, Integer intLimit) {
return this.pat.split(toSplit, intLimit);
}
public static String[] split(String strRegExp, String toSplit) {
return Pattern.compile(strRegExp).split(toSplit);
}
public static String[] split(String strRegExp, String toSplit, Integer intLimit) {
return Pattern.compile(strRegExp).split(toSplit, intLimit);
}
public static String quote(String toQuote) {
return Pattern.quote(toQuote);
}
public static String quoteReplacement(String toQuote) {
return System.Matcher.quoteReplacement(toQuote);
}
public class Searcher {
private System.Matcher sysMatcher;
public RegExp regex { get; private set; }
public String input { get; private set; }
public Boolean hasMatch { get; private set; }
public Integer findIndex { get; private set; }
public Integer findCount { get { return this.findIndex + 1; } }
public Boolean isAtEnd { get { return this.sysMatcher.hitEnd(); } }
public Match match { get; private set; }
public Match[] allMatches {
get {
Searcher s = new Searcher(this.regex, this.input);
Match[] allMatches = new Match[0];
while (s.hasMatch) {
allMatches.add(s.match);
s.next();
}
return allMatches;
}
}
public Searcher(String strRegExp, String toSearch) {
this(new RegExp(strRegExp), toSearch);
}
public Searcher(RegExp regex, String toSearch) {
this.input = toSearch;
this.regex = regex;
this.sysMatcher = regex.pat.matcher(toSearch);
this.findIndex = -1;
this.next();
}
public Boolean next() {
this.hasMatch = this.sysMatcher.find();
if (this.hasMatch) {
this.findIndex++;
String[] groups = new String[0];
Integer[] starts = new Integer[0];
Integer[] ends = new Integer[0];
for (Integer i = 0, l = this.sysMatcher.groupCount(); i <= l; i++) {
groups.add(this.sysMatcher.group(i));
starts.add(this.sysMatcher.start(i));
ends.add(this.sysMatcher.end(i));
}
this.match = new Match(
groups,
starts,
ends,
this
);
}
else this.match = null;
return this.hasMatch;
}
public void reset() {
this.sysMatcher.reset();
this.findIndex = -1;
this.next();
}
}
public class Match {
private Map<String,Integer> groupNameToIndex { get { return searcher.regex.groupNameToIndex; } }
public Searcher searcher { get; private set; }
private String[] realStringValues;
public String[] stringValues { get { return realStringValues.clone(); } }
public Integer[] stringStarts { get { return stringStarts.clone(); } private set; }
public Integer[] stringEnds { get { return stringEnds.clone(); } private set; }
public RegExp regex { get { return searcher.regex; } }
public String input { get { return searcher.input; } }
public Integer groupCount { get { return realStringValues.size() - 1; } }
public String stringValue { get { return realStringValues[0]; } }
public Integer stringStart { get { return stringStarts[0]; } }
public Integer stringEnd { get { return stringEnds[0]; } }
public Match(String[] stringValues, Integer[] stringStarts, Integer[] stringEnds, Searcher searcher) {
this.realStringValues = stringValues;
this.stringStarts = stringStarts;
this.stringEnds = stringEnds;
this.searcher = searcher;
}
public String getGroupString(Integer i) {
return this.realStringValues[i];
}
public String getGroupString(String name) {
return this.realStringValues[this.groupNameToIndex.get(name)];
}
public Integer[] getGroupStarts() {
return this.stringStarts.clone();
}
public Integer getGroupStart(Integer i) {
return this.stringStarts[i];
}
public Integer getGroupStart(String name) {
return this.stringStarts[this.groupNameToIndex.get(name)];
}
public Integer[] getGroupEnds() {
return this.stringEnds.clone();
}
public Integer getGroupEnd(Integer i) {
return this.stringEnds[i];
}
public Integer getGroupEnd(String name) {
return this.stringEnds[this.groupNameToIndex.get(name)];
}
public String format(String format) {
String[] args = this.stringValues;
format = format.replace('{{', '{' + args.size() + '}');
args.add('{');
format = format.replace('}}', '{' + args.size() + '}');
args.add('}');
for (String groupName : this.groupNameToIndex.keySet()) {
format = format.replace('{' + groupName + '}', '{' + this.groupNameToIndex.get(groupName) + '}');
}
return String.format(format, args);
}
}
}
/**
* Author: Chris West
* Source: https://gist.github.com/westc/a921e805036707d7c5f818a32124789a#file-regexp_tests-cls
* Description:
* Used to test the different features provided in the RegExp class.
*/
@isTest
public class RegExp_Tests {
/**
* Test out
* - `RegExp.getAllMatches()`
* - `RegExp#.getAllMatches()`
* - `RegExp.Searcher#.allMatches`
*/
@isTest
public static void testGetAllMatches() {
String strRegExp = '(\\d)(?<extraDigits>\\d*)';
String input = '1 + 23 != 456';
RegExp.Match[] instanceMatches = new RegExp(strRegExp).getAllMatches(input);
RegExp.Match[] staticMatches = RegExp.getAllMatches(strRegExp, input);
RegExp.Match[] allMatches = new RegExp.Searcher(strRegExp, input).allMatches;
Matcher s = Pattern.compile(strRegExp).matcher(input);
for (Integer i = 0, l = allMatches.size(); i < l; i++) {
s.find();
for (RegExp.Match m : new RegExp.Match[]{instanceMatches[i], staticMatches[i], allMatches[i]}) {
Assert.areEqual(m.stringValue, s.group());
Assert.areEqual(m.getGroupString(1), s.group(1));
Assert.areEqual(m.getGroupString('extraDigits'), s.group(2));
}
}
}
/**
* Test out
* - `RegExp.getFirstMatch()`
* - `RegExp#.getFirstMatch()`
* - `RegExp.Searcher#.match`
*/
@isTest
public static void testGetFirstMatch() {
String strRegExp = '(\\d)(?<extraDigits>\\d*)';
String input = '1 + 23 != 456';
RegExp.Match instanceMatch = new RegExp(strRegExp).getFirstMatch(input);
RegExp.Match staticMatch = RegExp.getFirstMatch(strRegExp, input);
RegExp.Match match = new RegExp.Searcher(strRegExp, input).match;
Matcher s = Pattern.compile(strRegExp).matcher(input);
s.find();
for (RegExp.Match m : new RegExp.Match[]{instanceMatch, staticMatch, match}) {
Assert.areEqual(m.stringValue, s.group());
Assert.areEqual(m.getGroupString(1), s.group(1));
Assert.areEqual(m.getGroupString('extraDigits'), s.group(2));
}
}
/**
* Test out `RegExp.Match#.format()`.
*/
@isTest
public static void testFormat() {
String strRegExp = '(?m)^(?<relation>\\w+):\\s+(?<firstName>\\w+)\\s+(?<lastName>\\w+)';
String input = String.join(new String[] {
'Father: John Smith',
'Mother: Jane Smith',
'Brother: Gary Peyton'
}, '\n');
RegExp.Match[] matches = RegExp.getAllMatches(strRegExp, input);
String format = 'The "{relation}" ({{relation}}) is "{lastName}, {firstName}".';
Assert.areEqual('The "Father" ({relation}) is "Smith, John".', matches[0].format(format));
Assert.areEqual('The "Mother" ({relation}) is "Smith, Jane".', matches[1].format(format));
Assert.areEqual('The "Brother" ({relation}) is "Peyton, Gary".', matches[2].format(format));
}
/**
* Test out
* - `RegExp.Match#.getGroupEnd()`
* - `RegExp.Match#.getGroupEnds()`
* - `RegExp.Match#.getGroupStart()`
* - `RegExp.Match#.getGroupStarts()`
*/
@isTest
public static void testMatchGetGroupRanges() {
RegExp rgx = new RegExp('^(?<lastName>\\w+),\\s+(?<firstName>\\w+)$');
RegExp.Match m = rgx.search('Frank, Anne').match;
Assert.areEqual(new Integer[]{0,0,7}, m.getGroupStarts());
Assert.areEqual(0, m.getGroupStart(0));
Assert.areEqual(0, m.getGroupStart(1));
Assert.areEqual(7, m.getGroupStart(2));
Assert.areEqual(0, m.getGroupStart('lastName'));
Assert.areEqual(7, m.getGroupStart('firstName'));
Assert.areEqual(new Integer[]{11,5,11}, m.getGroupEnds());
Assert.areEqual(11, m.getGroupEnd(0));
Assert.areEqual(5, m.getGroupEnd(1));
Assert.areEqual(11, m.getGroupEnd(2));
Assert.areEqual(5, m.getGroupEnd('lastName'));
Assert.areEqual(11, m.getGroupEnd('firstName'));
}
/**
* Test out
* - `RegExp.Match#.regex`
* - `RegExp.Match#.searcher`
* - `RegExp.Match#.input`
* - `RegExp.Match#.groupCount`
* - `RegExp.Match#.stringStart`
* - `RegExp.Match#.stringEnd`
*/
@isTest
public static void testMatchProperties() {
RegExp rgx = new RegExp('(?<lastName>\\w+),\\s+(?<firstName>\\w+)');
RegExp.Searcher s = rgx.search('Name:\tFrank, Anne');
RegExp.Match m = s.match;
Assert.areEqual(rgx, m.regex);
Assert.areEqual(s, m.searcher);
Assert.areEqual(s.input, m.input);
Assert.areEqual(2, m.groupCount);
Assert.areEqual(6, m.stringStart);
Assert.areEqual(17, m.stringEnd);
}
/**
* Test out
* - `RegExp.Searcher#.next()`
* - `RegExp.Searcher#.reset()`
*/
@isTest
public static void testMatcherReset() {
String strRegExp = '(?m)^(?<relation>\\w+):\\s+(?<firstName>\\w+)\\s+(?<lastName>\\w+)';
String input = String.join(new String[] {
'Father: John Smith',
'Mother: Jane Smith',
'Brother: Gary Peyton'
}, '\n');
RegExp.Searcher s = RegExp.search(strRegExp, input);
Assert.areEqual('Father', s.match.getGroupString('relation'));
Assert.areEqual(true, s.next());
Assert.areEqual('Mother', s.match.getGroupString('relation'));
s.reset();
Assert.areEqual('Father', s.match.getGroupString('relation'));
Assert.areEqual(true, s.next());
Assert.areEqual('Mother', s.match.getGroupString('relation'));
Assert.areEqual(true, s.next());
Assert.areEqual('Brother', s.match.getGroupString('relation'));
Assert.areEqual(false, s.next());
Assert.areEqual(null, s.match);
s.reset();
Assert.areEqual('Father', s.match.getGroupString('relation'));
}
/**
* Test out
* - `RegExp.Searcher#.findIndex`
* - `RegExp.Searcher#.findCount`
* - `RegExp.Searcher#.isAtEnd`
*/
@isTest
public static void testMatcherProperties() {
String strRegExp = '\\d+';
String input = '1, 23, 456 and 7890';
RegExp.Searcher s = RegExp.search(strRegExp, input);
Matcher realMR = Pattern.compile(strRegExp).matcher(input);
for (Integer findCount = 1; s.hasMatch; findCount++) {
// Make sure that isAtEnd is only true when at the end of the string
// even though hasMatch is still true until next() is called
// again.
if (findCount == 4) Assert.areEqual(s.isAtEnd, s.hasMatch);
else Assert.areNotEqual(s.isAtEnd, s.hasMatch);
// Do the other normal assertions.
Assert.areEqual(true, realMR.find());
Assert.areEqual(realMR.hitEnd(), s.isAtEnd);
Assert.areEqual(findCount - 1, s.findIndex);
Assert.areEqual(findCount, s.findCount);
s.next();
}
Assert.areEqual(false, realMR.find());
Assert.areEqual(realMR.hitEnd(), s.isAtEnd);
Assert.areEqual(3, s.findIndex);
Assert.areEqual(4, s.findCount);
s.reset();
Assert.areEqual(0, s.findIndex);
Assert.areEqual(1, s.findCount);
}
/**
* Test out
* - `RegExp.isIn()`
* - `RegExp#.isIn()`
* - `RegExp.matches()`
* - `RegExp#.matches()`
*/
@isTest
public static void testRegExpTesters() {
Assert.isTrue(new RegExp('(?=\\d)').isIn('You are not 50 yet.'));
Assert.isTrue(RegExp.isIn('\\d{2,}', 'You are not 50 yet.'));
Assert.isFalse(RegExp.isIn('\\d{2,}', 'You are not here.'));
Assert.isTrue(new RegExp('\\d{2,}').matches('12'));
Assert.isTrue(RegExp.matches('\\d{2,}', '12345'));
Assert.isTrue(RegExp.matches('\\d{2,5}', '12345'));
Assert.isFalse(RegExp.matches('\\d{2,5}', '123456'));
}
/**
* Test out
* - `RegExp.replaceAll()`
* - `RegExp#.replaceAll()`
* - `RegExp.replaceFirst()`
* - `RegExp#.replaceFirst()`
*/
@isTest
public static void testRegExpReplacers() {
String strRegExp = '(.)(?<two>..)';
String input = 'Hello world!';
Matcher m = Pattern.compile(strRegExp).matcher(input);
RegExp rgx = new RegExp(strRegExp);
String replacement = '${two}$1$2';
Assert.areEqual(m.replaceAll(replacement), rgx.replaceAll(input, replacement));
Assert.areEqual(m.replaceAll(replacement), RegExp.replaceAll(strRegExp, input, replacement));
Assert.areEqual(m.replaceFirst(replacement), rgx.replaceFirst(input, replacement));
Assert.areEqual(m.replaceFirst(replacement), RegExp.replaceFirst(strRegExp, input, replacement));
}
/**
* Test out
* - `RegExp.split()`
* - `RegExp#.split()`
*/
@isTest
public static void testRegExpSplitters() {
String strRegExp = '\\s*[,;]\\s*';
String input = '1, 2;3\n;7';
RegExp rgx = new RegExp(strRegExp);
Pattern pat = Pattern.compile(strRegExp);
Assert.areEqual(pat.split(input), rgx.split(input));
Assert.areEqual(pat.split(input, 2), rgx.split(input, 2));
Assert.areEqual(pat.split(input), RegExp.split(strRegExp, input));
Assert.areEqual(pat.split(input, 2), RegExp.split(strRegExp, input, 2));
}
/**
* Test out
* - `RegExp.quote()`
* - `RegExp.quoteReplacement()`
*/
@isTest
public static void testRegExpQuoters() {
Assert.areEqual(Pattern.quote('[]\\w+$1'), RegExp.quote('[]\\w+$1'));
Assert.areEqual(Matcher.quoteReplacement('[]\\w+$1'), RegExp.quoteReplacement('[]\\w+$1'));
}
/**
* Test out `RegExp.getGroupIndex()`
*/
@isTest
public static void testRegExpGetGroupIndex() {
String strRegExp = '(.)(?<two>..)';
RegExp rgx = new RegExp(strRegExp);
Assert.areEqual(null, rgx.getGroupIndex('one'));
Assert.areEqual(2, rgx.getGroupIndex('two'));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment