Skip to content

Instantly share code, notes, and snippets.

@atifaziz
Created September 23, 2016 16:18
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save atifaziz/8016e8cf8c89d39ad719057c2299eb94 to your computer and use it in GitHub Desktop.
Save atifaziz/8016e8cf8c89d39ad719057c2299eb94 to your computer and use it in GitHub Desktop.
C# extension method for Regex to turn matches into a stream of tokens
#region Copyright (c) 2016 Atif Aziz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#endregion
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
// ReSharper disable once PartialTypeWithSinglePart
static partial class RegexExtensions
{
public static IEnumerable<string> Tokens(this Regex regex, string input) =>
regex.Tokens(input, t => t, m => m.Value);
public static IEnumerable<T> Tokens<T>(this Regex regex, string input,
Func<string, T> textSelector, Func<Match, T> matchSelector)
{
if (textSelector == null) throw new ArgumentNullException(nameof(textSelector));
return regex.Tokens(input, (s, i, len) => textSelector(s.Substring(i, len)), matchSelector);
}
public static IEnumerable<T> Tokens<T>(this Regex regex, string input,
Func<string, int, int, T> textSelector, Func<Match, T> matchSelector)
{
if (regex == null) throw new ArgumentNullException(nameof(regex));
if (input == null) throw new ArgumentNullException(nameof(input));
if (textSelector == null) throw new ArgumentNullException(nameof(textSelector));
if (matchSelector == null) throw new ArgumentNullException(nameof(matchSelector));
return TokensCore(regex, input, textSelector, matchSelector);
}
static IEnumerable<T> TokensCore<T>(Regex regex, string input,
Func<string, int, int, T> textSelector, Func<Match, T> matchSelector)
{
var i = 0;
foreach (Match m in regex.Matches(input))
{
if (m.Index > i)
yield return textSelector(input, i, m.Index - i);
yield return matchSelector(m);
i = m.Index + m.Length;
}
if (i < input.Length)
yield return textSelector(input, i, input.Length - i);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment