Created
May 10, 2024 17:07
-
-
Save Advait-M/a326cd2e474b9520dc893765ec4cb2c4 to your computer and use it in GitHub Desktop.
Calculate width with ICU for cloud character (emoji variation selector)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <unicode/ubrk.h> | |
#include <unicode/brkiter.h> | |
#include <unicode/uchar.h> | |
#include <unicode/ustring.h> | |
#include <iostream> | |
#include <vector> | |
// Calculating the width of ☁️ using International Components for Unicode (ICU) in C++ with grapheme clusters. | |
// Compiles in C++11 or above, with ICU installed. | |
int main() { | |
UErrorCode status = U_ZERO_ERROR; | |
// Combine cloud character and variation selector into a single string. | |
UChar text[] = {0x2601, 0xFE0F, 0}; // Null-terminated UTF-16 string for ☁️ | |
// Create a grapheme cluster boundary iterator. | |
std::unique_ptr<icu::BreakIterator> iter(icu::BreakIterator::createCharacterInstance(icu::Locale::getDefault(), status)); | |
if (U_FAILURE(status)) { | |
std::cerr << "Failed to create BreakIterator: " << u_errorName(status) << std::endl; | |
return 1; | |
} | |
icu::UnicodeString str(text); | |
iter->setText(str); | |
// Iterate over grapheme clusters and count them. | |
int32_t count = 0; | |
int prev = iter->first(); | |
for (int32_t boundary = iter->next(); boundary != icu::BreakIterator::DONE; boundary = iter->next()) { | |
// Returns 0-2 for ☁️, confirming the width is 2. | |
std::cout << "Grapheme cluster from " << prev << " to " << boundary << std::endl; | |
count++; | |
prev = boundary; | |
} | |
// Returns 1 for cloud character (number of clusters). | |
std::cout << "Number of grapheme clusters: " << count << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment