Skip to content

Instantly share code, notes, and snippets.

@ctigeek
Last active January 24, 2019 15:16
Show Gist options
  • Save ctigeek/4950c4b07a50a0791f012858e3bb2214 to your computer and use it in GitHub Desktop.
Save ctigeek/4950c4b07a50a0791f012858e3bb2214 to your computer and use it in GitHub Desktop.
Split a string on a character, but don't split based on start-end characters.
/// <summary>
/// Split a string while preserving sections of it.
/// Similar to string.Split, but you can define start-end characters (e.g. quotes, brackets, braces) inside of which it will NOT split.
/// Preservers can also be "recursive" which means it can determine if it's in nested brackets or parens, etc.
/// If the start & end characters are different, there's a good chance you want to set recursive to true.
/// See the associated unit test for an example that can parse json....
/// Also supports escape characters so the separator and start-end characters can be ignored.
/// </summary>
public class Splitter
{
public static string[] Split(string splitThis, char separator, char separatorEscape, params Preserver[] preservers)
{
if (string.IsNullOrEmpty(splitThis))
{
return null;
}
foreach (var preserver in preservers)
{
if (preserver.Start == separator)
{
throw new Exception("The separator character `" + separator + "` cannot be the same as the start character of a preserver.");
}
preserver.IsInside = false;
if (preserver.Recursive && preserver.Start == preserver.End)
{
throw new Exception("Error. Preserver cannot be recursive if start and end are the same character: " + preserver.Start);
}
}
Queue<int> splitPoints = new Queue<int>();
int position = 0;
while (position < splitThis.Length)
{
Console.WriteLine(splitThis.Substring(0, position + 1));
var currChar = splitThis[position];
var prevChar = position == 0 ? '\0' : splitThis[position - 1];
var insidePreserver = preservers.FirstOrDefault(p => p.IsInside);
if (insidePreserver != null)
{
ShouldWeExitAPreserver(currChar, prevChar, insidePreserver);
}
else
{
if (!preservers.Any(p => ShouldWeEnterAPreserver(currChar, prevChar, p)))
{
if (currChar == separator && prevChar != separatorEscape)
{
splitPoints.Enqueue(position);
Console.WriteLine("Found a split.");
}
}
}
position++;
}
if (splitPoints.Count == 0)
{
return new[] {splitThis};
}
var result = new string[splitPoints.Count + 1];
int resultIndex = 0;
var startAt = 0;
while (splitPoints.Count > 0)
{
var splitPoint = splitPoints.Dequeue();
if (splitPoint - startAt > 0)
{
result[resultIndex] = splitThis.Substring(startAt, splitPoint - startAt);
}
else
{
result[resultIndex] = string.Empty;
}
startAt = splitPoint + 1;
resultIndex++;
if (splitPoints.Count == 0)
{
if (startAt < splitThis.Length)
{
result[resultIndex] = splitThis.Substring(startAt);
}
else
{
result[resultIndex] = string.Empty;
}
}
}
return result;
}
private static bool ShouldWeEnterAPreserver(char currChar, char prevChar, Preserver preserver)
{
if (preserver.IsInside)
{
if (preserver.SubPreserver != null)
{
ShouldWeEnterAPreserver(currChar, prevChar, preserver.SubPreserver);
}
return false;
}
if (preserver.Start == currChar && preserver.Escape != prevChar)
{
preserver.IsInside = true;
Console.WriteLine("Entering with char " + currChar);
return true;
}
return false;
}
private static bool ShouldWeExitAPreserver(char currChar, char prevChar, Preserver preserver)
{
if (!preserver.IsInside) return false;
if (preserver.SubPreserver != null)
{
if (preserver.SubPreserver.IsInside)
{
if (ShouldWeExitAPreserver(currChar, prevChar, preserver.SubPreserver))
{
return false;
}
}
else
{
if (ShouldWeEnterAPreserver(currChar, prevChar, preserver.SubPreserver))
{
return false;
}
}
}
if (preserver.Recursive)
{
var recursivePreserver = new Preserver { IsInside = false, Recursive = false, Start = preserver.Start, End = preserver.End, Escape = preserver.Escape };
if (ShouldWeEnterAPreserver(currChar, prevChar, recursivePreserver))
{
preserver.RecursiveCount++;
Console.WriteLine("Entering Recursive, count=" + preserver.RecursiveCount);
return false;
}
}
if (preserver.End == currChar && preserver.Escape != prevChar)
{
if (preserver.RecursiveCount > 0)
{
preserver.RecursiveCount--;
Console.WriteLine("Exiting recursive preserver, count=" + preserver.RecursiveCount);
}
else
{
preserver.IsInside = false;
Console.WriteLine("Exiting with char " + currChar);
}
return true;
}
return false;
}
public class Preserver
{
public char Start { get; set; }
public char End { get; set; }
public char Escape { get; set; } = '\u0002';
public bool IsInside { get; set; }
public bool Recursive { get; set; }
public int RecursiveCount { get; set; } = 0;
public Preserver SubPreserver { get; set; }
}
}
private static readonly Splitter.Preserver quotePreserver = new Splitter.Preserver { Start = '"', End = '"', Escape = '\\' };
private static readonly Splitter.Preserver curlyBoiPreserver = new Splitter.Preserver { Start = '{', End = '}', Escape = '\\', Recursive = true, SubPreserver = QuotePreserver };
private static readonly Splitter.Preserver bracketPreserver = new Splitter.Preserver { Start = '[', End = ']', Escape = '\\', Recursive = true, SubPreserver = QuotePreserver };
[Test]
public void TryToBreakStuff()
{
const string splitThis = @"as,a{}{}[]{[]}[[}}}}(])()()s,as,[as,""a]s"",as],as ";
var result = Splitter.Split(splitThis, ',', '\0', quotePreserver, curlyBoiPreserver, bracketPreserver);
Assert.That(result.Length, Is.EqualTo(5));
}
[Test]
public void TestEscapes()
{
const string splitThis = @"as\,as,as, [as,""a]s\"""",as,as,as],,\""as,as ";
var quotePreserver = new Splitter.Preserver { Start = '"', End = '"', Escape = '\\' };
var curlyBoiPreserver = new Splitter.Preserver { Start = '{', End = '}', Escape = '\\', SubPreserver = quotePreserver };
var bracketPreserver = new Splitter.Preserver { Start = '[', End = ']', Escape = '\\', SubPreserver = quotePreserver };
var result = Splitter.Split(splitThis, ',', '\\', quotePreserver, curlyBoiPreserver, bracketPreserver);
Assert.That(result.Length, Is.EqualTo(6));
}
[Test]
public void SplitTestJson()
{
const string splitThis = @"
""blah"":""value"",
""blah2"":""va,lue2"",
""blah3"":{
""blah4"":""value"",
""blah5"":[""v,alue"",""va,lue"",""value""]
},
""blah4"":[""value"",""va,lue"",""value""]";
var quotePreserver = new Splitter.Preserver {Start = '"', End = '"', Escape = '\\'};
var curlyBoiPreserver = new Splitter.Preserver { Start = '{', End = '}', Escape = '\\' };
var bracketPreserver = new Splitter.Preserver { Start = '[', End = ']', Escape = '\\' };
var result = Splitter.Split(splitThis, ',', '\0', quotePreserver, curlyBoiPreserver, bracketPreserver);
Assert.That(result.Length, Is.EqualTo(4));
var result2 = Splitter.Split(result[2].Trim(), ':', '\0', quotePreserver, curlyBoiPreserver, bracketPreserver);
Assert.That(result2.Length, Is.EqualTo(2));
var result3 = Splitter.Split(result[3].Trim(), ':', '\0', quotePreserver, curlyBoiPreserver, bracketPreserver);
Assert.That(result3.Length, Is.EqualTo(2));
var result4 = Splitter.Split(result3[1].Trim(), ',', '\0', quotePreserver, curlyBoiPreserver, bracketPreserver);
Assert.That(result4.Length, Is.EqualTo(1));
var result5 = Splitter.Split(result3[1].Trim(), ',', '\0', quotePreserver, curlyBoiPreserver);
Assert.That(result5.Length, Is.EqualTo(3));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment