Skip to content

Instantly share code, notes, and snippets.

@Advait-M
Created May 10, 2024 17:07
Show Gist options
  • Save Advait-M/a326cd2e474b9520dc893765ec4cb2c4 to your computer and use it in GitHub Desktop.
Save Advait-M/a326cd2e474b9520dc893765ec4cb2c4 to your computer and use it in GitHub Desktop.
Calculate width with ICU for cloud character (emoji variation selector)
#include <unicode/ubrk.h>
#include <unicode/brkiter.h>
#include <unicode/uchar.h>
#include <unicode/ustring.h>
#include <iostream>
#include <vector>
// Calculating the width of ☁️ using International Components for Unicode (ICU) in C++ with grapheme clusters.
// Compiles in C++11 or above, with ICU installed.
int main() {
UErrorCode status = U_ZERO_ERROR;
// Combine cloud character and variation selector into a single string.
UChar text[] = {0x2601, 0xFE0F, 0}; // Null-terminated UTF-16 string for ☁️
// Create a grapheme cluster boundary iterator.
std::unique_ptr<icu::BreakIterator> iter(icu::BreakIterator::createCharacterInstance(icu::Locale::getDefault(), status));
if (U_FAILURE(status)) {
std::cerr << "Failed to create BreakIterator: " << u_errorName(status) << std::endl;
return 1;
}
icu::UnicodeString str(text);
iter->setText(str);
// Iterate over grapheme clusters and count them.
int32_t count = 0;
int prev = iter->first();
for (int32_t boundary = iter->next(); boundary != icu::BreakIterator::DONE; boundary = iter->next()) {
// Returns 0-2 for ☁️, confirming the width is 2.
std::cout << "Grapheme cluster from " << prev << " to " << boundary << std::endl;
count++;
prev = boundary;
}
// Returns 1 for cloud character (number of clusters).
std::cout << "Number of grapheme clusters: " << count << std::endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment