Skip to content

Instantly share code, notes, and snippets.

@josheinstein
Last active March 30, 2021 19:40
Show Gist options
  • Save josheinstein/1ed7800bdcf45b931ab5 to your computer and use it in GitHub Desktop.
Save josheinstein/1ed7800bdcf45b931ab5 to your computer and use it in GitHub Desktop.
Sure, lots of SQLCLR examples show you how to use .NET Regex to test if a string matches a pattern. But these functions will let you get a list of matches, replace substrings, capture named groups, split strings, and more. (Note that this is not a complete and buildable solution. There's plenty of info out there about creating a SQLCLR project i…
-- Produces a table of matches within the string, along with the
-- character position and length of the matches.
SELECT * FROM Utils.RegexMatches('There are 10 people in 5 groups of 2.', '\d+');
-- Produces a table of all the matched group expressions within the
-- string, along with the character position and length of the substrings.
SELECT * FROM Utils.RegexGroups('someone@somewhere.org', '^(?<user>[^@]+)@(?<domain>.*)$');
-- Prints out the domain portion of the email address
PRINT Utils.GetMatch('someone@somewhere.org', '^(?<user>[^@]+)@(?<domain>.*)$', 'domain');
PRINT Utils.GetMatch('someone@somewhere.org', '^(?<user>[^@]+)@(?<domain>.*)$', '2'); -- equivalent
-- Prints out the entire match
PRINT Utils.GetMatch('someone@somewhere.org', '^(?<user>[^@]+)@(?<domain>.*)$', NULL);
using System;
using System.Data;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using System.Globalization;
using System.Text.RegularExpressions;
using Microsoft.SqlServer.Server;
public partial class UserDefinedFunctions
{
/// <summary>
/// Given an input string, escapes any special regex characters so that they are not interpreted
/// as regular expression language elements. For example, parenthases will be escaped to \(.
/// </summary>
/// <param name="input">The input string to escape.</param>
/// <returns>The escaped string.</returns>
[SqlFunction( IsDeterministic = true )]
[return: SqlFacet( MaxSize = -1 )]
public static SqlString EscapeRegex( [SqlFacet( MaxSize = -1 )]SqlString input )
{
// null handling
if ( input.IsNull ) {
return SqlString.Null;
}
return Regex.Escape( (string)input );
}
/// <summary>
/// Returns true if the specified input string matches the specified regex pattern, otherwise false.
/// </summary>
/// <param name="input">The input string to test against pattern.</param>
/// <param name="pattern">A regular expression pattern to match input against.</param>
/// <returns>True if input matches pattern, otherwise false.</returns>
[SqlFunction( IsDeterministic = true )]
[return: SqlFacet( MaxSize = -1 )]
public static SqlBoolean IsMatch( [SqlFacet( MaxSize = -1 )]SqlString input, [SqlFacet( MaxSize = -1 )]SqlString pattern )
{
// null handling
if ( input.IsNull || pattern.IsNull ) {
return SqlBoolean.Null;
}
return Regex.IsMatch( (string)input, (string)pattern, RegexOptions.CultureInvariant | RegexOptions.Singleline );
}
/// <summary>
/// Matches a given input string against a regular expression, and if the string matches, returns the
/// subexpression captured by the specified named or numeric group, or the entire matched string if
/// a group is not specified.
/// </summary>
/// <param name="input">The input string to test against pattern.</param>
/// <param name="pattern">A regular expression pattern to match input against.</param>
/// <param name="group">The name or numeric position of a captured group within the match to return, or null to return the entire match.</param>
/// <returns>If the regex match passes: The value of a captured group within the matched string if a named or numeric group is specified, otherwise
/// returns the entire match. If the regex match does not pass: Null.</returns>
[SqlFunction( IsDeterministic = true )]
[return: SqlFacet( MaxSize = -1 )]
public static SqlString GetMatch( [SqlFacet( MaxSize = -1 )]SqlString input, [SqlFacet( MaxSize = -1 )] SqlString pattern, [SqlFacet( MaxSize = -1 )]SqlString group )
{
// null handling
if ( input.IsNull || pattern.IsNull ) {
return SqlString.Null;
}
Match match = Regex.Match( (string)input, (string)pattern, RegexOptions.CultureInvariant | RegexOptions.Singleline );
if ( match != null && match.Success ) {
// If they did not request a group, just return the whole match
if ( group.IsNull ) {
return match.Value;
}
// If they requested a group, was it a number or named group?
int matchGroupNumber;
if ( Int32.TryParse( (string)group, out matchGroupNumber ) ) {
// they requested a match group by number
Group matchGroup = match.Groups[matchGroupNumber];
if ( matchGroup != null && matchGroup.Success ) {
return matchGroup.Value;
}
}
else {
// they requested a match group by name
Group matchGroup = match.Groups[(string)group];
if ( matchGroup != null && matchGroup.Success ) {
return matchGroup.Value;
}
}
}
return SqlString.Null;
}
/// <summary>
/// Uses a regular expression to find a substring within the input string and replace it with a replacement
/// expression (which may refer to capture groups within the match using $1, etc.) and returns the result.
/// </summary>
/// <param name="input">The input string to search against.</param>
/// <param name="pattern">A regular expression pattern to match input against.</param>
/// <param name="replacement">A replacement expression to replace matches within the input string.</param>
/// <returns>The input string with all occurrences of substrings matching pattern replaced with replacement.</returns>
[SqlFunction( IsDeterministic = true )]
[return: SqlFacet( MaxSize = -1 )]
public static SqlString Replace( [SqlFacet( MaxSize = -1 )]SqlString input, [SqlFacet( MaxSize = -1 )]SqlString pattern, [SqlFacet( MaxSize = -1 )]SqlString replacement )
{
// null handling
if ( input.IsNull || pattern.IsNull ) {
return SqlString.Null;
}
return Regex.Replace( (string)input, (string)pattern, (string)replacement ?? String.Empty, RegexOptions.CultureInvariant | RegexOptions.Singleline );
}
/// <summary>
/// Matches a given input string against a regular expression pattern and returns a table that contains
/// the captured groups within the match and their positions within the string.
/// </summary>
/// <param name="input">The input string to match against.</param>
/// <param name="pattern">A regular expression pattern.</param>
/// <returns>A table of captured groups within the match, or an empty table if the input string did not match.</returns>
[SqlFunction( FillRowMethodName = "RegexGroups_FillRow", TableDefinition = "Number int, Name nvarchar(255), Value nvarchar(max), Start int, Length int" )]
public static System.Collections.IEnumerable RegexGroups( [SqlFacet( MaxSize = -1 )]SqlString input, [SqlFacet( MaxSize = -1 )]SqlString pattern )
{
// null handling
if ( !input.IsNull && !pattern.IsNull ) {
var regex = new Regex( (string)pattern, RegexOptions.CultureInvariant | RegexOptions.Singleline );
var match = regex.Match( (string)input );
if ( match.Success ) {
var results = new RegexMatchGroup[match.Groups.Count];
for ( int i = 0 ; i < results.Length ; i++ ) {
results[i] = new RegexMatchGroup {
Number = i,
Name = regex.GroupNameFromNumber( i ),
Value = match.Groups[i].Value,
Index = match.Groups[i].Index,
Length = match.Groups[i].Length
};
}
return results;
}
}
return new RegexMatchGroup[0];
}
/// <summary>
/// The private 'fill method' for the RegexGroups function.
/// </summary>
/// <param name="row">The custom row data passed from the RegexGroups function.</param>
/// <param name="number">The group index.</param>
/// <param name="name">The name of the group expression, if a named group was used.</param>
/// <param name="value">The captured substring within the match.</param>
/// <param name="start">The position within the input string</param>
/// <param name="length">The length of the captured substring.</param>
private static void RegexGroups_FillRow( object row, out int number, out string name, out string value, out int start, out int length )
{
var rmg = (RegexMatchGroup)row;
number = rmg.Number;
name = rmg.Name;
value = rmg.Value;
start = rmg.Index;
length = rmg.Length;
}
/// <summary>
/// Matches a given input string against a regular expression pattern and returns a table that contains
/// all of the successful matches within the input string.
/// </summary>
/// <param name="input">The input string to match against.</param>
/// <param name="pattern">A regular expression pattern.</param>
/// <returns>A table of matches within the string, or an empty table if the input string did not match.</returns>
[SqlFunction( FillRowMethodName = "RegexMatches_FillRow", TableDefinition = "Value nvarchar(max), Start int, Length int" )]
public static System.Collections.IEnumerable RegexMatches( [SqlFacet( MaxSize = -1 )]SqlString input, [SqlFacet( MaxSize = -1 )]SqlString pattern )
{
// null handling
if ( input.IsNull || pattern.IsNull ) {
return new Match[0];
}
return Regex.Matches( (string)input, (string)pattern, RegexOptions.CultureInvariant | RegexOptions.Singleline );
}
/// <summary>
/// The private 'fill method' for the RegexMatches function.
/// </summary>
/// <param name="row">The custom row data passed from the RegexMatches function.</param>
/// <param name="value">The matched substring.</param>
/// <param name="start">The position within the input string.</param>
/// <param name="length">The length of the matched substring.</param>
private static void RegexMatches_FillRow( object row, out string value, out int start, out int length )
{
var match = (Match)row;
value = match.Value;
start = match.Index;
length = match.Length;
}
/// <summary>
/// Splits a given input string at each occurrence of a delimiter, which is specified as a regular expression
/// pattern, and returns a table containing the components.
/// </summary>
/// <param name="input">The input string to split.</param>
/// <param name="pattern">A regular expression pattern to split on. Note that you may need to escape certain delimiters that have special meaning in regex, such as a pipe.</param>
/// <returns>A table containing the elements of the split string.</returns>
[SqlFunction( FillRowMethodName = "Split_FillRow", TableDefinition = "Value nvarchar(max)" )]
public static System.Collections.IEnumerable Split( [SqlFacet( MaxSize = -1 )]SqlString input, [SqlFacet( MaxSize = -1 )]SqlString pattern )
{
// null handling
if ( input.IsNull || pattern.IsNull ) {
return new string[0];
}
return Regex.Split( (string)input, (string)pattern, RegexOptions.CultureInvariant | RegexOptions.Singleline );
}
/// <summary>
/// The private 'fill method' for the Split function.
/// </summary>
/// <param name="row">The custom row data passed from the Split function.</param>
/// <param name="value">A substring component of the input string once it has been split.</param>
private static void Split_FillRow( object row, out string value )
{
value = (string)row;
}
/// <summary>
/// A private class used to pass data to the fill function.
/// </summary>
private class RegexMatchGroup
{
public int Number;
public string Name;
public string Value;
public int Index;
public int Length;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment