Skip to content

Instantly share code, notes, and snippets.

@mabster
Created July 22, 2023 10:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mabster/21e28b944f7ac1cc6f8397460e3cdd09 to your computer and use it in GitHub Desktop.
Save mabster/21e28b944f7ac1cc6f8397460e3cdd09 to your computer and use it in GitHub Desktop.
Parse a string into an array of tags (denoted by square brackets) and terms (which may be quoted to preserve whitespace).
using System;
using System.Linq;
public class Program
{
static readonly char[] _trmDelimiters = { ' ', ',', '.', '!', '/' };
static readonly char[] _tagDelimiters = { '[', ']' };
public static void Main()
{
string s = "This is a [tagged term], a sentence fragment, a [tag with spaces], and a \"quoted term with spaces.\"";
var extract = from c in s.Split('"', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).Chunk(2) // "quoted text"
from t in c[0].Split(_tagDelimiters, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries).Chunk(2) // [tagged text]
from w in t[0].Split(_trmDelimiters, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries) // words
where w.Length > 2 // ignore "a", "is" etc.
select (Word: w, Tag: string.Concat(t[1..]), Quote: string.Concat(c[1..]));
var result = new
{
Terms = extract.Select(e => e.Word).Union(extract.Select(e => e.Quote)).Distinct().ToArray(),
Tags = extract.Select(e => e.Tag).Where(t => !string.IsNullOrWhiteSpace(t)).Distinct().ToArray()
};
Console.WriteLine($"{result.Terms.Length} terms:");
Console.WriteLine(string.Join(",", result.Terms));
Console.WriteLine($"\n{result.Tags.Length} tags:");
Console.WriteLine(string.Join(",", result.Tags));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment