Skip to content

Instantly share code, notes, and snippets.

@geniuszxy
Last active July 27, 2023 11:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save geniuszxy/9bb507f03eb17526d4f58f3c387031b3 to your computer and use it in GitHub Desktop.
Save geniuszxy/9bb507f03eb17526d4f58f3c387031b3 to your computer and use it in GitHub Desktop.
A simple class iterates over a string that accounts an emoji sequence as a single character.
public class EmojiIterator
{
private string _text;
private int _head, _next;
private int _index, _length;
public EmojiIterator(string text)
{
_text = text;
Reset();
}
public int Char { get { return _head; } }
public int Offset { get { return _index; } set { _index = value; _length = 0; _next = -1; } }
public int SequenceLength { get { return _length; } }
public string Sequence { get { return _text.Substring(_index, _length); } }
public bool MoveNext()
{
try
{
_index += _length;
_length = 0;
if (_next >= 0)
_head = _next;
else if (_index < _text.Length)
_head = char.ConvertToUtf32(_text, _index);
else
return false;
FindSequence(_head);
}
catch
{
_next = -1;
return false;
}
return true;
}
public void Reset()
{
_next = -1;
_index = _length = 0;
}
private bool GetNextChar()
{
var index = _index + _length;
if (index < _text.Length)
{
_next = char.ConvertToUtf32(_text, index);
return true;
}
else
{
_next = -2;
return false;
}
}
private void UseNextChar()
{
_length += 2;
_next = -1;
}
private void FindSequence(int headChar)
{
if (headChar <= 0xffff)
_length++;
else
_length += 2;
if (!GetNextChar())
return;
if (headChar > 0xffff && CheckFlagOrTagSequence(headChar))
return;
CheckEmojiSequence();
}
//emoji_zwj_sequence := emoji_zwj_element ( ZWJ emoji_zwj_element )+
//ZWJ := \x{200d}
//emoji_zwj_element :=
// emoji_character
//| emoji_presentation_sequence
//| emoji_modifier_sequence
private void ZWJ()
{
_length++; // <<ZWJ>>
if (GetNextChar() && _next >= 0)
FindSequence(_next);
}
private void CheckZWJ()
{
if (GetNextChar() && _next == 0x200d) //ZWJ
ZWJ();
}
private void CheckEmojiSequence()
{
if (_next > 0xffff) //supplementary planes
{
//emoji_modifier_sequence := emoji_modifier_base emoji_modifier
if (_next >= 0x1f3fb && _next <= 0x1f3ff) //emoji_modifier
{
_length += 2; // <<emoji_modifier>>
CheckZWJ();
}
}
//emoji variation selector
//emoji_presentation_sequence := emoji_character emoji_presentation_selector
//emoji_presentation_selector := \x{FE0F}
else if (_next == 0xfe0f)
{
_length++; // <<emoji_presentation_selector>>
CheckZWJ();
}
else if (_next == 0x200d) //ZWJ
ZWJ();
}
private bool CheckFlagOrTagSequence(int head)
{
//emoji_flag_sequence := regional_indicator regional_indicator
if (IsRegionalIndicatorSymbol(head) && IsRegionalIndicatorSymbol(_next))
{
UseNextChar(); // <<regional_indicator>> #2nd
return true;
}
//emoji_tag_sequence := tag_base tag_spec tag_end
//tag_base := emoji_character
// | emoji_modifier_sequence
// | emoji_presentation_sequence
//tag_spec := [\x{E0020}-\x{E007E}]+
//tag_end := \x{E007F} (CANCEL TAG)
if (IsTagComponent(_next))
{
do
{
_length += 2; // <<tag_spec>>
if (!GetNextChar())
break;
}
while (IsTagComponent(_next));
if (_next == 0xe007f)
UseNextChar(); // <<tag_end>>
return true;
}
return false;
}
private static bool IsRegionalIndicatorSymbol(int ch)
{
return ch >= 0x1f1e6 && ch <= 0x1f1ff;
}
private static bool IsTagComponent(int ch)
{
return ch >= 0xe0020 && ch <= 0xe007e;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment