Skip to content

Instantly share code, notes, and snippets.

@gekka
Last active April 16, 2024 09:47
Show Gist options
  • Save gekka/bbeaffe1ab2d49b6f8dcf48dd95d5d8b to your computer and use it in GitHub Desktop.
Save gekka/bbeaffe1ab2d49b6f8dcf48dd95d5d8b to your computer and use it in GitHub Desktop.
ユニコードを異字体セレクタとかを考慮した1文字ごとに分割
#include "test.h"
#include <afxwin.h>
#include <icu.h>
#pragma comment(lib, "icu.lib")
typedef struct GraphemeLeft_ {
uint32_t value;
GraphemeLeft_(int i) : value(i) { };
operator uint32_t() const { return value; }
} GraphemeLeft;
typedef struct GraphemeRight_ {
uint32_t value;
GraphemeRight_(int i) : value(i) { };
operator uint32_t() const { return value; }
} GraphemeRight;
typedef struct GraphemeMid_ {
uint32_t start;
uint32_t count;
GraphemeMid_(int32_t start, int32_t count) : start(start), count(count) { };
} GraphemeMid;
typedef struct {} GraphemeGetLength;
static int32_t getLengthGrapheme(UBreakIterator* pIterator)
{
uint32_t count = 0;
while (UBRK_DONE != ubrk_next(pIterator)) {
count++;
}
return count;
}
static int32_t skipGrapheme(UBreakIterator* pIterator, int32_t count)
{
uint32_t index = ubrk_current(pIterator);
while (count--)
{
int32_t end = ubrk_next(pIterator);
if (end == UBRK_DONE)
{
break;
}
index = end;
}
return index;
}
int32_t operator %(CString& s, GraphemeGetLength dummy)
{
LPCWSTR p = s.GetBuffer();
UErrorCode errcode;
UBreakIterator* pIterator = ubrk_open(UBRK_CHARACTER, ULOC_JAPAN, (UChar const*)p, -1, &errcode);
int32_t length = getLengthGrapheme(pIterator);
ubrk_close(pIterator);
return length;
}
CString operator %(CString* ps, GraphemeLeft left)
{
LPCWSTR p = ps->GetBuffer();
UErrorCode errcode;
UBreakIterator* pIterator = ubrk_open(UBRK_CHARACTER, ULOC_JAPAN, (UChar const*)p, -1, &errcode);
int32_t start = ubrk_first(pIterator);
int32_t end = skipGrapheme(pIterator, left);
ubrk_close(pIterator);
return ps->Mid(start, end - start);
}
CString operator %(CString& s, GraphemeLeft left) { return &s % left; };
CString operator %(CString* ps, GraphemeRight right)
{
CString ret;
LPCWSTR p = ps->GetBuffer();
UErrorCode errcode;
UBreakIterator* pIterator = ubrk_open(UBRK_CHARACTER, ULOC_JAPAN, (UChar const*)p, -1, &errcode);
int index = ubrk_first(pIterator);
int len = getLengthGrapheme(pIterator);
int skipCount = len - right;
if (skipCount <= 0)
{
ret = *ps;
}
else
{
ubrk_first(pIterator);
int start = skipGrapheme(pIterator, skipCount);
ret = CString(p + start);
}
ubrk_close(pIterator);
return ret;
}
CString operator %(CString& s, GraphemeRight right) { return &s % right; };
CString operator %(CString* ps, GraphemeMid mid)
{
LPCWSTR p = ps->GetBuffer();
UErrorCode errcode;
UBreakIterator* pIterator = ubrk_open(UBRK_CHARACTER, ULOC_JAPAN, (UChar const*)p, -1, &errcode);
ubrk_first(pIterator);
int32_t start = skipGrapheme(pIterator, mid.start);
int32_t end = skipGrapheme(pIterator, mid.count);
ubrk_close(pIterator);
return ps->Mid(start, end - start);
}
CString operator %(CString& s, GraphemeMid mid) { return &s % mid; };
void Test()
{
CString string = L"葛\U000E0100あ葛\U000E0101い\U0001F469う";
int32_t length = string % GraphemeGetLength();
CString x = string % GraphemeLeft(3);
CString y = string % GraphemeRight(3);
CString z = string % GraphemeMid(1, 3);
MessageBox(0, x, L"", 0);
MessageBox(0, y, L"", 0);
MessageBox(0, z, L"", 0);
};
#include "test.h"
#include <afxwin.h>
#include <icu.h>
#pragma comment(lib, "icu.lib")
void Test()
{
// 異字体セレクタとか絵文字とか
CString string = L"A\uFE00BC𠮷☺葛\U000E0100 葛\U000E0101 \U00020BB7\U0001F1EF \U0001F1F5";
// 肌色違い4人家族
//CString string = L"\U0001F469\U0001F3FB" L"\u200D" L"\U0001F468\U0001F3FC" L"\u200D" L"\U0001F467\U0001F3FD" L"\u200D" L"\U0001F476\U0001F3FE";
CStringArray array;
LPCWSTR p = string.GetBuffer();
UErrorCode errcode;
UBreakIterator* pIterator = ubrk_open(UBRK_CHARACTER, ULOC_JAPAN, (UChar const*)p, -1, &errcode);
int32_t index = ubrk_first(pIterator);
while (index != UBRK_DONE)
{
int32_t nextIndex = ubrk_next(pIterator);
if(nextIndex == UBRK_DONE)
{
break;
}
int32_t count = nextIndex - index;
CString part = string.Mid(index, count);
array.Add(part);
index = nextIndex;
}
ubrk_close(pIterator);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment