Last active
July 27, 2023 11:26
-
-
Save geniuszxy/9bb507f03eb17526d4f58f3c387031b3 to your computer and use it in GitHub Desktop.
A simple class iterates over a string that accounts an emoji sequence as a single character.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class EmojiIterator | |
{ | |
private string _text; | |
private int _head, _next; | |
private int _index, _length; | |
public EmojiIterator(string text) | |
{ | |
_text = text; | |
Reset(); | |
} | |
public int Char { get { return _head; } } | |
public int Offset { get { return _index; } set { _index = value; _length = 0; _next = -1; } } | |
public int SequenceLength { get { return _length; } } | |
public string Sequence { get { return _text.Substring(_index, _length); } } | |
public bool MoveNext() | |
{ | |
try | |
{ | |
_index += _length; | |
_length = 0; | |
if (_next >= 0) | |
_head = _next; | |
else if (_index < _text.Length) | |
_head = char.ConvertToUtf32(_text, _index); | |
else | |
return false; | |
FindSequence(_head); | |
} | |
catch | |
{ | |
_next = -1; | |
return false; | |
} | |
return true; | |
} | |
public void Reset() | |
{ | |
_next = -1; | |
_index = _length = 0; | |
} | |
private bool GetNextChar() | |
{ | |
var index = _index + _length; | |
if (index < _text.Length) | |
{ | |
_next = char.ConvertToUtf32(_text, index); | |
return true; | |
} | |
else | |
{ | |
_next = -2; | |
return false; | |
} | |
} | |
private void UseNextChar() | |
{ | |
_length += 2; | |
_next = -1; | |
} | |
private void FindSequence(int headChar) | |
{ | |
if (headChar <= 0xffff) | |
_length++; | |
else | |
_length += 2; | |
if (!GetNextChar()) | |
return; | |
if (headChar > 0xffff && CheckFlagOrTagSequence(headChar)) | |
return; | |
CheckEmojiSequence(); | |
} | |
//emoji_zwj_sequence := emoji_zwj_element ( ZWJ emoji_zwj_element )+ | |
//ZWJ := \x{200d} | |
//emoji_zwj_element := | |
// emoji_character | |
//| emoji_presentation_sequence | |
//| emoji_modifier_sequence | |
private void ZWJ() | |
{ | |
_length++; // <<ZWJ>> | |
if (GetNextChar() && _next >= 0) | |
FindSequence(_next); | |
} | |
private void CheckZWJ() | |
{ | |
if (GetNextChar() && _next == 0x200d) //ZWJ | |
ZWJ(); | |
} | |
private void CheckEmojiSequence() | |
{ | |
if (_next > 0xffff) //supplementary planes | |
{ | |
//emoji_modifier_sequence := emoji_modifier_base emoji_modifier | |
if (_next >= 0x1f3fb && _next <= 0x1f3ff) //emoji_modifier | |
{ | |
_length += 2; // <<emoji_modifier>> | |
CheckZWJ(); | |
} | |
} | |
//emoji variation selector | |
//emoji_presentation_sequence := emoji_character emoji_presentation_selector | |
//emoji_presentation_selector := \x{FE0F} | |
else if (_next == 0xfe0f) | |
{ | |
_length++; // <<emoji_presentation_selector>> | |
CheckZWJ(); | |
} | |
else if (_next == 0x200d) //ZWJ | |
ZWJ(); | |
} | |
private bool CheckFlagOrTagSequence(int head) | |
{ | |
//emoji_flag_sequence := regional_indicator regional_indicator | |
if (IsRegionalIndicatorSymbol(head) && IsRegionalIndicatorSymbol(_next)) | |
{ | |
UseNextChar(); // <<regional_indicator>> #2nd | |
return true; | |
} | |
//emoji_tag_sequence := tag_base tag_spec tag_end | |
//tag_base := emoji_character | |
// | emoji_modifier_sequence | |
// | emoji_presentation_sequence | |
//tag_spec := [\x{E0020}-\x{E007E}]+ | |
//tag_end := \x{E007F} (CANCEL TAG) | |
if (IsTagComponent(_next)) | |
{ | |
do | |
{ | |
_length += 2; // <<tag_spec>> | |
if (!GetNextChar()) | |
break; | |
} | |
while (IsTagComponent(_next)); | |
if (_next == 0xe007f) | |
UseNextChar(); // <<tag_end>> | |
return true; | |
} | |
return false; | |
} | |
private static bool IsRegionalIndicatorSymbol(int ch) | |
{ | |
return ch >= 0x1f1e6 && ch <= 0x1f1ff; | |
} | |
private static bool IsTagComponent(int ch) | |
{ | |
return ch >= 0xe0020 && ch <= 0xe007e; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment