Created
May 10, 2024 17:07
Calculate width with ICU for cloud character (emoji variation selector)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <unicode/ubrk.h> | |
#include <unicode/brkiter.h> | |
#include <unicode/uchar.h> | |
#include <unicode/ustring.h> | |
#include <iostream> | |
#include <vector> | |
// Calculating the width of ☁️ using International Components for Unicode (ICU) in C++ with grapheme clusters. | |
// Compiles in C++11 or above, with ICU installed. | |
int main() { | |
UErrorCode status = U_ZERO_ERROR; | |
// Combine cloud character and variation selector into a single string. | |
UChar text[] = {0x2601, 0xFE0F, 0}; // Null-terminated UTF-16 string for ☁️ | |
// Create a grapheme cluster boundary iterator. | |
std::unique_ptr<icu::BreakIterator> iter(icu::BreakIterator::createCharacterInstance(icu::Locale::getDefault(), status)); | |
if (U_FAILURE(status)) { | |
std::cerr << "Failed to create BreakIterator: " << u_errorName(status) << std::endl; | |
return 1; | |
} | |
icu::UnicodeString str(text); | |
iter->setText(str); | |
// Iterate over grapheme clusters and count them. | |
int32_t count = 0; | |
int prev = iter->first(); | |
for (int32_t boundary = iter->next(); boundary != icu::BreakIterator::DONE; boundary = iter->next()) { | |
// Returns 0-2 for ☁️, confirming the width is 2. | |
std::cout << "Grapheme cluster from " << prev << " to " << boundary << std::endl; | |
count++; | |
prev = boundary; | |
} | |
// Returns 1 for cloud character (number of clusters). | |
std::cout << "Number of grapheme clusters: " << count << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment