Skip to content

Instantly share code, notes, and snippets.

@zz-jason
Created March 27, 2017 07:53
Show Gist options
  • Save zz-jason/078110974bb931b7f8e3432775ecfd05 to your computer and use it in GitHub Desktop.
Save zz-jason/078110974bb931b7f8e3432775ecfd05 to your computer and use it in GitHub Desktop.
class Utf8Iterator
{
public:
Utf8Iterator(const char* data, size_t size): mData(data), mSize(size), mPos(0) {}
~Utf8Iterator() {}
bool HasNext() const
{
return mPos < mSize;
}
size_t Next()
{
if (! HasNext()) return mSize;
mPos += GetWidth();
return mPos;
}
size_t Next(size_t utf8CharCount)
{
for (size_t i = 0; i < utf8CharCount && HasNext(); ++ i)
{
Next();
}
return mPos;
}
size_t GetWidth() const
{
if (! HasNext()) return 0;
if (Is4ByteUtf8CodePoint())
{
return 4;
}
else if (Is3ByteUtf8CodePoint())
{
return 3;
}
else if (Is2ByteUtf8CodePoint())
{
return 2;
}
else
{
return 1;
}
}
const char* GetCodePoint() const
{
return mData + mPos;
}
std::string GetChar() const
{
return std::string(GetCodePoint(), GetWidth());
}
private:
bool Is1ByteUtf8CodePoint() const { return 0x00 == (0x80 & *GetCodePoint()); }
bool Is2ByteUtf8CodePoint() const { return 0xc0 == (0xe0 & *GetCodePoint()); }
bool Is3ByteUtf8CodePoint() const { return 0xe0 == (0xf0 & *GetCodePoint()); }
bool Is4ByteUtf8CodePoint() const { return 0xf0 == (0xf8 & *GetCodePoint()); }
private:
const char* mData;
size_t mSize;
size_t mPos;
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment