Created
August 30, 2018 08:11
-
-
Save applePrincess/352086267ca7c90255d01035733115d3 to your computer and use it in GitHub Desktop.
何となく作ってみました。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Foo | |
{ | |
/// <summary> | |
/// Splits bytes up to chunk of bytes which represents a `character`. | |
/// Which throws Exception when invalid UTF-8 byte array is given. | |
/// </summary> | |
public static System.Collections.Generic.List<byte[]> splitCharsFromUTF8(byte[] utf8) | |
{ | |
var ret = new System.Collections.Generic.List<byte[]>(); | |
for(int i = 0; i < utf8.Length; i++) | |
{ | |
if((utf8[i] & 0x80) == 0) | |
{ | |
ret.Add(new byte[]{utf8[i]}); | |
} | |
else if(i <= utf8.Length - 2 && | |
((utf8[i] & 0xE0) == 0xC0) && | |
((utf8[i] & 0x1F) >= 0x02) && | |
((utf8[i+1] & 0xC0) == 0x80)) | |
{ | |
ret.Add(new byte[]{utf8[i], utf8[i+1]}); | |
i++; | |
} | |
else if(i <= utf8.Length - 3 && | |
((utf8[i] & 0xF0) == 0xE0) && | |
((utf8[i+1] & 0xC0) == 0x80) && | |
((utf8[i+1] & 0xBF) >= 0x20) && | |
((utf8[i+2] & 0xC0) == 0x80)) | |
{ | |
ret.Add(new byte[]{utf8[i], utf8[i+1], utf8[i+2]}); | |
i += 2; | |
} | |
else if(i <= utf8.Length - 4 && | |
((utf8[i] & 0xF8) == 0xF0) && | |
((utf8[i+1] & 0xC0) == 0x80) && | |
((utf8[i+1] & 0x3F) >= 0x10) && | |
((utf8[i+2] & 0xC0) == 0x80) && | |
((utf8[i+3] & 0xC0) == 0x80)) | |
{ | |
ret.Add(new byte[]{utf8[i], utf8[i+1], utf8[i+2], utf8[i+3]}); | |
i += 3; | |
} | |
else | |
{ | |
throw new System.Exception("Invalid UTF8 sequence found at(" + i + ") with value of" + utf8[i]); | |
} | |
} | |
return ret; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment