Skip to content

Instantly share code, notes, and snippets.

@13xforever
Created March 27, 2011 11:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save 13xforever/889136 to your computer and use it in GitHub Desktop.
Save 13xforever/889136 to your computer and use it in GitHub Desktop.
Utility class to decode strings with literals in it
/*
This class should decode everything that is described in Chapter 2.4.4.4 Character literals of C# Language Specification.
See http://msdn.microsoft.com/en-us/library/aa691087.aspx for details.
I actually have test cases in the project in case you wondered, but feel free to test it for yourself.
It's free to use without limitations, but you must not expect any warranty or support for this code either.
*/
using System.Collections.Generic;
namespace System.Text
{
public sealed class LiteralDecoder
{
private static readonly HashSet<char> hexadecimal = new HashSet<char>
{
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'a', 'b', 'c', 'd', 'e', 'f',
'A', 'B', 'C', 'D', 'E', 'F',
};
private readonly StringBuilder output = new StringBuilder();
private Action<char?> Process;
private StringBuilder buffer;
private int unicodeLiteralCounter;
private LiteralDecoder()
{
Process = NormalChar;
}
public static string Decode(IEnumerable<char> encodedText)
{
var stateMachine = new LiteralDecoder();
foreach (var c in encodedText)
stateMachine.Process(c);
stateMachine.Process(null);
return stateMachine.output.ToString();
}
private void NormalChar(char? c)
{
if (c == null) return;
if (c == '\\')
Process = StartEscapeSequence;
else
output.Append(c);
}
private void StartEscapeSequence(char? c)
{
switch (c)
{
case '\'':
output.Append(c);
Process = NormalChar;
break;
case '"':
output.Append(c);
Process = NormalChar;
break;
case '\\':
output.Append(c);
Process = NormalChar;
break;
case '0':
output.Append('\0');
Process = NormalChar;
break;
case 'a':
output.Append('\a');
Process = NormalChar;
break;
case 'b':
output.Append('\b');
Process = NormalChar;
break;
case 'f':
output.Append('\f');
Process = NormalChar;
break;
case 'n':
output.Append('\n');
Process = NormalChar;
break;
case 'r':
output.Append('\r');
Process = NormalChar;
break;
case 't':
output.Append('\t');
Process = NormalChar;
break;
case 'v':
output.Append('\v');
Process = NormalChar;
break;
case 'u':
buffer = new StringBuilder();
Process = UnicodeChar;
unicodeLiteralCounter = 0;
break;
case 'U':
buffer = new StringBuilder();
Process = SurrogateUnicodeChar;
unicodeLiteralCounter = 0;
break;
case 'x':
buffer = new StringBuilder();
Process = VariableUnicodeChar;
unicodeLiteralCounter = 0;
break;
default:
throw new FormatException("Invalid escape sequence \\" + c ?? "<null>");
}
}
private void UnicodeChar(char? c)
{
if (c == null || !hexadecimal.Contains(c.Value))
throw new FormatException("Invalid sequence: \\u" + buffer + c ?? "<null>");
buffer.Append(c);
unicodeLiteralCounter++;
if (unicodeLiteralCounter != 4) return;
int charCode = Convert.ToInt32(buffer.ToString(), 16);
char resultChar = Convert.ToChar(charCode);
output.Append(resultChar);
buffer = null;
Process = NormalChar;
}
private void VariableUnicodeChar(char? c)
{
bool isHexChar = c != null && hexadecimal.Contains(c.Value);
if (isHexChar)
{
buffer.Append(c);
unicodeLiteralCounter++;
}
else if (unicodeLiteralCounter == 0)
throw new FormatException("Invalid sequence: \\x" + buffer + c ?? "<null>");
if (isHexChar && unicodeLiteralCounter != 4) return;
int charCode = Convert.ToInt32(buffer.ToString(), 16);
char resultChar = Convert.ToChar(charCode);
output.Append(resultChar);
buffer = null;
Process = NormalChar;
if (!isHexChar) Process(c);
}
private void SurrogateUnicodeChar(char? c)
{
if (c == null || !hexadecimal.Contains(c.Value))
throw new FormatException("Invalid sequence \\U" + buffer + c ?? "<null>");
buffer.Append(c);
unicodeLiteralCounter++;
if (unicodeLiteralCounter != 8) return;
int charCode = Convert.ToInt32(buffer.ToString(), 16);
if (charCode > 0x10FFFF)
throw new ArgumentOutOfRangeException("Unicode characters with code points above 0x10FFFF are not supported: \\U" + buffer + c ?? "<null>");
string resultCharacter = char.ConvertFromUtf32(charCode);
output.Append(resultCharacter);
buffer = null;
Process = NormalChar;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment