Skip to content

Instantly share code, notes, and snippets.

@applePrincess
Created August 30, 2018 08:11
Show Gist options
  • Save applePrincess/352086267ca7c90255d01035733115d3 to your computer and use it in GitHub Desktop.
Save applePrincess/352086267ca7c90255d01035733115d3 to your computer and use it in GitHub Desktop.
何となく作ってみました。
class Foo
{
/// <summary>
/// Splits bytes up to chunk of bytes which represents a `character`.
/// Which throws Exception when invalid UTF-8 byte array is given.
/// </summary>
public static System.Collections.Generic.List<byte[]> splitCharsFromUTF8(byte[] utf8)
{
var ret = new System.Collections.Generic.List<byte[]>();
for(int i = 0; i < utf8.Length; i++)
{
if((utf8[i] & 0x80) == 0)
{
ret.Add(new byte[]{utf8[i]});
}
else if(i <= utf8.Length - 2 &&
((utf8[i] & 0xE0) == 0xC0) &&
((utf8[i] & 0x1F) >= 0x02) &&
((utf8[i+1] & 0xC0) == 0x80))
{
ret.Add(new byte[]{utf8[i], utf8[i+1]});
i++;
}
else if(i <= utf8.Length - 3 &&
((utf8[i] & 0xF0) == 0xE0) &&
((utf8[i+1] & 0xC0) == 0x80) &&
((utf8[i+1] & 0xBF) >= 0x20) &&
((utf8[i+2] & 0xC0) == 0x80))
{
ret.Add(new byte[]{utf8[i], utf8[i+1], utf8[i+2]});
i += 2;
}
else if(i <= utf8.Length - 4 &&
((utf8[i] & 0xF8) == 0xF0) &&
((utf8[i+1] & 0xC0) == 0x80) &&
((utf8[i+1] & 0x3F) >= 0x10) &&
((utf8[i+2] & 0xC0) == 0x80) &&
((utf8[i+3] & 0xC0) == 0x80))
{
ret.Add(new byte[]{utf8[i], utf8[i+1], utf8[i+2], utf8[i+3]});
i += 3;
}
else
{
throw new System.Exception("Invalid UTF8 sequence found at(" + i + ") with value of" + utf8[i]);
}
}
return ret;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment