Last active
April 16, 2024 09:47
-
-
Save gekka/bbeaffe1ab2d49b6f8dcf48dd95d5d8b to your computer and use it in GitHub Desktop.
ユニコードを異字体セレクタとかを考慮した1文字ごとに分割
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "test.h" | |
#include <afxwin.h> | |
#include <icu.h> | |
#pragma comment(lib, "icu.lib") | |
typedef struct GraphemeLeft_ { | |
uint32_t value; | |
GraphemeLeft_(int i) : value(i) { }; | |
operator uint32_t() const { return value; } | |
} GraphemeLeft; | |
typedef struct GraphemeRight_ { | |
uint32_t value; | |
GraphemeRight_(int i) : value(i) { }; | |
operator uint32_t() const { return value; } | |
} GraphemeRight; | |
typedef struct GraphemeMid_ { | |
uint32_t start; | |
uint32_t count; | |
GraphemeMid_(int32_t start, int32_t count) : start(start), count(count) { }; | |
} GraphemeMid; | |
typedef struct {} GraphemeGetLength; | |
static int32_t getLengthGrapheme(UBreakIterator* pIterator) | |
{ | |
uint32_t count = 0; | |
while (UBRK_DONE != ubrk_next(pIterator)) { | |
count++; | |
} | |
return count; | |
} | |
static int32_t skipGrapheme(UBreakIterator* pIterator, int32_t count) | |
{ | |
uint32_t index = ubrk_current(pIterator); | |
while (count--) | |
{ | |
int32_t end = ubrk_next(pIterator); | |
if (end == UBRK_DONE) | |
{ | |
break; | |
} | |
index = end; | |
} | |
return index; | |
} | |
int32_t operator %(CString& s, GraphemeGetLength dummy) | |
{ | |
LPCWSTR p = s.GetBuffer(); | |
UErrorCode errcode; | |
UBreakIterator* pIterator = ubrk_open(UBRK_CHARACTER, ULOC_JAPAN, (UChar const*)p, -1, &errcode); | |
int32_t length = getLengthGrapheme(pIterator); | |
ubrk_close(pIterator); | |
return length; | |
} | |
CString operator %(CString* ps, GraphemeLeft left) | |
{ | |
LPCWSTR p = ps->GetBuffer(); | |
UErrorCode errcode; | |
UBreakIterator* pIterator = ubrk_open(UBRK_CHARACTER, ULOC_JAPAN, (UChar const*)p, -1, &errcode); | |
int32_t start = ubrk_first(pIterator); | |
int32_t end = skipGrapheme(pIterator, left); | |
ubrk_close(pIterator); | |
return ps->Mid(start, end - start); | |
} | |
CString operator %(CString& s, GraphemeLeft left) { return &s % left; }; | |
CString operator %(CString* ps, GraphemeRight right) | |
{ | |
CString ret; | |
LPCWSTR p = ps->GetBuffer(); | |
UErrorCode errcode; | |
UBreakIterator* pIterator = ubrk_open(UBRK_CHARACTER, ULOC_JAPAN, (UChar const*)p, -1, &errcode); | |
int index = ubrk_first(pIterator); | |
int len = getLengthGrapheme(pIterator); | |
int skipCount = len - right; | |
if (skipCount <= 0) | |
{ | |
ret = *ps; | |
} | |
else | |
{ | |
ubrk_first(pIterator); | |
int start = skipGrapheme(pIterator, skipCount); | |
ret = CString(p + start); | |
} | |
ubrk_close(pIterator); | |
return ret; | |
} | |
CString operator %(CString& s, GraphemeRight right) { return &s % right; }; | |
CString operator %(CString* ps, GraphemeMid mid) | |
{ | |
LPCWSTR p = ps->GetBuffer(); | |
UErrorCode errcode; | |
UBreakIterator* pIterator = ubrk_open(UBRK_CHARACTER, ULOC_JAPAN, (UChar const*)p, -1, &errcode); | |
ubrk_first(pIterator); | |
int32_t start = skipGrapheme(pIterator, mid.start); | |
int32_t end = skipGrapheme(pIterator, mid.count); | |
ubrk_close(pIterator); | |
return ps->Mid(start, end - start); | |
} | |
CString operator %(CString& s, GraphemeMid mid) { return &s % mid; }; | |
void Test() | |
{ | |
CString string = L"葛\U000E0100あ葛\U000E0101い\U0001F469う"; | |
int32_t length = string % GraphemeGetLength(); | |
CString x = string % GraphemeLeft(3); | |
CString y = string % GraphemeRight(3); | |
CString z = string % GraphemeMid(1, 3); | |
MessageBox(0, x, L"", 0); | |
MessageBox(0, y, L"", 0); | |
MessageBox(0, z, L"", 0); | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "test.h" | |
#include <afxwin.h> | |
#include <icu.h> | |
#pragma comment(lib, "icu.lib") | |
void Test() | |
{ | |
// 異字体セレクタとか絵文字とか | |
CString string = L"A\uFE00BC𠮷☺葛\U000E0100 葛\U000E0101 \U00020BB7\U0001F1EF \U0001F1F5"; | |
// 肌色違い4人家族 | |
//CString string = L"\U0001F469\U0001F3FB" L"\u200D" L"\U0001F468\U0001F3FC" L"\u200D" L"\U0001F467\U0001F3FD" L"\u200D" L"\U0001F476\U0001F3FE"; | |
CStringArray array; | |
LPCWSTR p = string.GetBuffer(); | |
UErrorCode errcode; | |
UBreakIterator* pIterator = ubrk_open(UBRK_CHARACTER, ULOC_JAPAN, (UChar const*)p, -1, &errcode); | |
int32_t index = ubrk_first(pIterator); | |
while (index != UBRK_DONE) | |
{ | |
int32_t nextIndex = ubrk_next(pIterator); | |
if(nextIndex == UBRK_DONE) | |
{ | |
break; | |
} | |
int32_t count = nextIndex - index; | |
CString part = string.Mid(index, count); | |
array.Add(part); | |
index = nextIndex; | |
} | |
ubrk_close(pIterator); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment