bakercp/ofUnicode.cpp

## ofUnicode.cpp
// =============================================================================
//
// Copyright (c) 2009-2013 Christopher Baker <http://christopherbaker.net>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// =============================================================================


#include "ofUnicode.h"
#include "Poco/Buffer.h"

//  http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT // win
//  http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT           // mac
//  http://unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT                // linux


static const std::size_t STANDARD_UNICODE_CHARS_LEN = 228;
static const ofUTF32Char STANDARD_UNICODE_CHARS[] = {
    0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
    0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
    0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
    0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
    0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
    0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
    0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
    0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
    0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
    0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
    0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
    0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
    0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
    0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
    0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
    0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF,
    0x0102, 0x0103, 0x0104, 0x0105, 0x0106, 0x0107, 0x010C, 0x010D,
    0x010E, 0x010F, 0x0110, 0x0111, 0x0118, 0x0119, 0x011A, 0x011B,
    0x0131, 0x0139, 0x013A, 0x013D, 0x013E, 0x0141, 0x0142, 0x0143,
    0x0144, 0x0147, 0x0148, 0x0150, 0x0151, 0x0152, 0x0153, 0x0154,
    0x0155, 0x0158, 0x0159, 0x015A, 0x015B, 0x015E, 0x015F, 0x0160,
    0x0161, 0x0162, 0x0163, 0x0164, 0x0165, 0x016E, 0x016F, 0x0170,
    0x0171, 0x0178, 0x0179, 0x017A, 0x017B, 0x017C, 0x017D, 0x017E,
    0x0192, 0x02C6, 0x02C7, 0x02D8, 0x02D9, 0x02DA, 0x02DB, 0x02DC,
    0x02DD, 0x03A9, 0x03C0, 0x2013, 0x2014, 0x2018, 0x2019, 0x201A,
    0x201C, 0x201D, 0x201E, 0x2020, 0x2021, 0x2022, 0x2026, 0x2030,
    0x2039, 0x203A, 0x2044, 0x20AC, 0x2122, 0x2202, 0x2206, 0x220F,
    0x2211, 0x221A, 0x221E, 0x222B, 0x2248, 0x2260, 0x2264, 0x2265,
    0x25CA, 0xF8FF, 0xFB01, 0xFB02
};

static const ofUnicodeBlock OF_UNICODE_BLOCKS[] =
{
    ofUnicodeBlock(OF_BASIC_LATIN,0x0000,0x007F),
    ofUnicodeBlock(OF_LATIN_1_SUPPLEMENT,0x0080,0x00FF),
    ofUnicodeBlock(OF_LATIN_EXTENDED_A,0x0100,0x017F),
    ofUnicodeBlock(OF_LATIN_EXTENDED_B,0x0180,0x024F),
    ofUnicodeBlock(OF_IPA_EXTENSIONS,0x0250,0x02AF),
    ofUnicodeBlock(OF_SPACING_MODIFIER_LETTERS,0x02B0,0x02FF),
    ofUnicodeBlock(OF_COMBINING_DIACRITICAL_MARKS,0x0300,0x036F),
    ofUnicodeBlock(OF_GREEK_AND_COPTIC,0x0370,0x03FF),
    ofUnicodeBlock(OF_CYRILLIC,0x0400,0x04FF),
    ofUnicodeBlock(OF_CYRILLIC_SUPPLEMENT,0x0500,0x052F),
    ofUnicodeBlock(OF_ARMENIAN,0x0530,0x058F),
    ofUnicodeBlock(OF_HEBREW,0x0590,0x05FF),
    ofUnicodeBlock(OF_ARABIC,0x0600,0x06FF),
    ofUnicodeBlock(OF_SYRIAC,0x0700,0x074F),
    ofUnicodeBlock(OF_ARABIC_SUPPLEMENT,0x0750,0x077F),
    ofUnicodeBlock(OF_THAANA,0x0780,0x07BF),
    ofUnicodeBlock(OF_NKO,0x07C0,0x07FF),
    ofUnicodeBlock(OF_SAMARITAN,0x0800,0x083F),
    ofUnicodeBlock(OF_MANDAIC,0x0840,0x085F),
    ofUnicodeBlock(OF_ARABIC_EXTENDED_A,0x08A0,0x08FF),
    ofUnicodeBlock(OF_DEVANAGARI,0x0900,0x097F),
    ofUnicodeBlock(OF_BENGALI,0x0980,0x09FF),
    ofUnicodeBlock(OF_GURMUKHI,0x0A00,0x0A7F),
    ofUnicodeBlock(OF_GUJARATI,0x0A80,0x0AFF),
    ofUnicodeBlock(OF_ORIYA,0x0B00,0x0B7F),
    ofUnicodeBlock(OF_TAMIL,0x0B80,0x0BFF),
    ofUnicodeBlock(OF_TELUGU,0x0C00,0x0C7F),
    ofUnicodeBlock(OF_KANNADA,0x0C80,0x0CFF),
    ofUnicodeBlock(OF_MALAYALAM,0x0D00,0x0D7F),
    ofUnicodeBlock(OF_SINHALA,0x0D80,0x0DFF),
    ofUnicodeBlock(OF_THAI,0x0E00,0x0E7F),
    ofUnicodeBlock(OF_LAO,0x0E80,0x0EFF),
    ofUnicodeBlock(OF_TIBETAN,0x0F00,0x0FFF),
    ofUnicodeBlock(OF_MYANMAR,0x1000,0x109F),
    ofUnicodeBlock(OF_GEORGIAN,0x10A0,0x10FF),
    ofUnicodeBlock(OF_HANGUL_JAMO,0x1100,0x11FF),
    ofUnicodeBlock(OF_ETHIOPIC,0x1200,0x137F),
    ofUnicodeBlock(OF_ETHIOPIC_SUPPLEMENT,0x1380,0x139F),
    ofUnicodeBlock(OF_CHEROKEE,0x13A0,0x13FF),
    ofUnicodeBlock(OF_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,0x1400,0x167F),
    ofUnicodeBlock(OF_OGHAM,0x1680,0x169F),
    ofUnicodeBlock(OF_RUNIC,0x16A0,0x16FF),
    ofUnicodeBlock(OF_TAGALOG,0x1700,0x171F),
    ofUnicodeBlock(OF_HANUNOO,0x1720,0x173F),
    ofUnicodeBlock(OF_BUHID,0x1740,0x175F),
    ofUnicodeBlock(OF_TAGBANWA,0x1760,0x177F),
    ofUnicodeBlock(OF_KHMER,0x1780,0x17FF),
    ofUnicodeBlock(OF_MONGOLIAN,0x1800,0x18AF),
    ofUnicodeBlock(OF_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,0x18B0,0x18FF),
    ofUnicodeBlock(OF_LIMBU,0x1900,0x194F),
    ofUnicodeBlock(OF_TAI_LE,0x1950,0x197F),
    ofUnicodeBlock(OF_NEW_TAI_LUE,0x1980,0x19DF),
    ofUnicodeBlock(OF_KHMER_SYMBOLS,0x19E0,0x19FF),
    ofUnicodeBlock(OF_BUGINESE,0x1A00,0x1A1F),
    ofUnicodeBlock(OF_TAI_THAM,0x1A20,0x1AAF),
    ofUnicodeBlock(OF_BALINESE,0x1B00,0x1B7F),
    ofUnicodeBlock(OF_SUNDANESE,0x1B80,0x1BBF),
    ofUnicodeBlock(OF_BATAK,0x1BC0,0x1BFF),
    ofUnicodeBlock(OF_LEPCHA,0x1C00,0x1C4F),
    ofUnicodeBlock(OF_OL_CHIKI,0x1C50,0x1C7F),
    ofUnicodeBlock(OF_SUNDANESE_SUPPLEMENT,0x1CC0,0x1CCF),
    ofUnicodeBlock(OF_VEDIC_EXTENSIONS,0x1CD0,0x1CFF),
    ofUnicodeBlock(OF_PHONETIC_EXTENSIONS,0x1D00,0x1D7F),
    ofUnicodeBlock(OF_PHONETIC_EXTENSIONS_SUPPLEMENT,0x1D80,0x1DBF),
    ofUnicodeBlock(OF_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,0x1DC0,0x1DFF),
    ofUnicodeBlock(OF_LATIN_EXTENDED_ADDITIONAL,0x1E00,0x1EFF),
    ofUnicodeBlock(OF_GREEK_EXTENDED,0x1F00,0x1FFF),
    ofUnicodeBlock(OF_GENERAL_PUNCTUATION,0x2000,0x206F),
    ofUnicodeBlock(OF_SUPERSCRIPTS_AND_SUBSCRIPTS,0x2070,0x209F),
    ofUnicodeBlock(OF_CURRENCY_SYMBOLS,0x20A0,0x20CF),
    ofUnicodeBlock(OF_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS,0x20D0,0x20FF),
    ofUnicodeBlock(OF_LETTERLIKE_SYMBOLS,0x2100,0x214F),
    ofUnicodeBlock(OF_NUMBER_FORMS,0x2150,0x218F),
    ofUnicodeBlock(OF_ARROWS,0x2190,0x21FF),
    ofUnicodeBlock(OF_MATHEMATICAL_OPERATORS,0x2200,0x22FF),
    ofUnicodeBlock(OF_MISCELLANEOUS_TECHNICAL,0x2300,0x23FF),
    ofUnicodeBlock(OF_CONTROL_PICTURES,0x2400,0x243F),
    ofUnicodeBlock(OF_OPTICAL_CHARACTER_RECOGNITION,0x2440,0x245F),
    ofUnicodeBlock(OF_ENCLOSED_ALPHANUMERICS,0x2460,0x24FF),
    ofUnicodeBlock(OF_BOX_DRAWING,0x2500,0x257F),
    ofUnicodeBlock(OF_BLOCK_ELEMENTS,0x2580,0x259F),
    ofUnicodeBlock(OF_GEOMETRIC_SHAPES,0x25A0,0x25FF),
    ofUnicodeBlock(OF_MISCELLANEOUS_SYMBOLS,0x2600,0x26FF),
    ofUnicodeBlock(OF_DINGBATS,0x2700,0x27BF),
    ofUnicodeBlock(OF_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,0x27C0,0x27EF),
    ofUnicodeBlock(OF_SUPPLEMENTAL_ARROWS_A,0x27F0,0x27FF),
    ofUnicodeBlock(OF_BRAILLE_PATTERNS,0x2800,0x28FF),
    ofUnicodeBlock(OF_SUPPLEMENTAL_ARROWS_B,0x2900,0x297F),
    ofUnicodeBlock(OF_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,0x2980,0x29FF),
    ofUnicodeBlock(OF_SUPPLEMENTAL_MATHEMATICAL_OPERATORS,0x2A00,0x2AFF),
    ofUnicodeBlock(OF_MISCELLANEOUS_SYMBOLS_AND_ARROWS,0x2B00,0x2BFF),
    ofUnicodeBlock(OF_GLAGOLITIC,0x2C00,0x2C5F),
    ofUnicodeBlock(OF_LATIN_EXTENDED_C,0x2C60,0x2C7F),
    ofUnicodeBlock(OF_COPTIC,0x2C80,0x2CFF),
    ofUnicodeBlock(OF_GEORGIAN_SUPPLEMENT,0x2D00,0x2D2F),
    ofUnicodeBlock(OF_TIFINAGH,0x2D30,0x2D7F),
    ofUnicodeBlock(OF_ETHIOPIC_EXTENDED,0x2D80,0x2DDF),
    ofUnicodeBlock(OF_CYRILLIC_EXTENDED_A,0x2DE0,0x2DFF),
    ofUnicodeBlock(OF_SUPPLEMENTAL_PUNCTUATION,0x2E00,0x2E7F),
    ofUnicodeBlock(OF_CJK_RADICALS_SUPPLEMENT,0x2E80,0x2EFF),
    ofUnicodeBlock(OF_KANGXI_RADICALS,0x2F00,0x2FDF),
    ofUnicodeBlock(OF_IDEOGRAPHIC_DESCRIPTION_CHARACTERS,0x2FF0,0x2FFF),
    ofUnicodeBlock(OF_CJK_SYMBOLS_AND_PUNCTUATION,0x3000,0x303F),
    ofUnicodeBlock(OF_HIRAGANA,0x3040,0x309F),
    ofUnicodeBlock(OF_KATAKANA,0x30A0,0x30FF),
    ofUnicodeBlock(OF_BOPOMOFO,0x3100,0x312F),
    ofUnicodeBlock(OF_HANGUL_COMPATIBILITY_JAMO,0x3130,0x318F),
    ofUnicodeBlock(OF_KANBUN,0x3190,0x319F),
    ofUnicodeBlock(OF_BOPOMOFO_EXTENDED,0x31A0,0x31BF),
    ofUnicodeBlock(OF_CJK_STROKES,0x31C0,0x31EF),
    ofUnicodeBlock(OF_KATAKANA_PHONETIC_EXTENSIONS,0x31F0,0x31FF),
    ofUnicodeBlock(OF_ENCLOSED_CJK_LETTERS_AND_MONTHS,0x3200,0x32FF),
    ofUnicodeBlock(OF_CJK_COMPATIBILITY,0x3300,0x33FF),
    ofUnicodeBlock(OF_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,0x3400,0x4DBF),
    ofUnicodeBlock(OF_YIJING_HEXAGRAM_SYMBOLS,0x4DC0,0x4DFF),
    ofUnicodeBlock(OF_CJK_UNIFIED_IDEOGRAPHS,0x4E00,0x9FFF),
    ofUnicodeBlock(OF_YI_SYLLABLES,0xA000,0xA48F),
    ofUnicodeBlock(OF_YI_RADICALS,0xA490,0xA4CF),
    ofUnicodeBlock(OF_LISU,0xA4D0,0xA4FF),
    ofUnicodeBlock(OF_VAI,0xA500,0xA63F),
    ofUnicodeBlock(OF_CYRILLIC_EXTENDED_B,0xA640,0xA69F),
    ofUnicodeBlock(OF_BAMUM,0xA6A0,0xA6FF),
    ofUnicodeBlock(OF_MODIFIER_TONE_LETTERS,0xA700,0xA71F),
    ofUnicodeBlock(OF_LATIN_EXTENDED_D,0xA720,0xA7FF),
    ofUnicodeBlock(OF_SYLOTI_NAGRI,0xA800,0xA82F),
    ofUnicodeBlock(OF_COMMON_INDIC_NUMBER_FORMS,0xA830,0xA83F),
    ofUnicodeBlock(OF_PHAGS_PA,0xA840,0xA87F),
    ofUnicodeBlock(OF_SAURASHTRA,0xA880,0xA8DF),
    ofUnicodeBlock(OF_DEVANAGARI_EXTENDED,0xA8E0,0xA8FF),
    ofUnicodeBlock(OF_KAYAH_LI,0xA900,0xA92F),
    ofUnicodeBlock(OF_REJANG,0xA930,0xA95F),
    ofUnicodeBlock(OF_HANGUL_JAMO_EXTENDED_A,0xA960,0xA97F),
    ofUnicodeBlock(OF_JAVANESE,0xA980,0xA9DF),
    ofUnicodeBlock(OF_CHAM,0xAA00,0xAA5F),
    ofUnicodeBlock(OF_MYANMAR_EXTENDED_A,0xAA60,0xAA7F),
    ofUnicodeBlock(OF_TAI_VIET,0xAA80,0xAADF),
    ofUnicodeBlock(OF_MEETEI_MAYEK_EXTENSIONS,0xAAE0,0xAAFF),
    ofUnicodeBlock(OF_ETHIOPIC_EXTENDED_A,0xAB00,0xAB2F),
    ofUnicodeBlock(OF_MEETEI_MAYEK,0xABC0,0xABFF),
    ofUnicodeBlock(OF_HANGUL_SYLLABLES,0xAC00,0xD7AF),
    ofUnicodeBlock(OF_HANGUL_JAMO_EXTENDED_B,0xD7B0,0xD7FF),
    ofUnicodeBlock(OF_HIGH_SURROGATES,0xD800,0xDB7F),
    ofUnicodeBlock(OF_HIGH_PRIVATE_USE_SURROGATES,0xDB80,0xDBFF),
    ofUnicodeBlock(OF_LOW_SURROGATES,0xDC00,0xDFFF),
    ofUnicodeBlock(OF_PRIVATE_USE_AREA,0xE000,0xF8FF),
    ofUnicodeBlock(OF_CJK_COMPATIBILITY_IDEOGRAPHS,0xF900,0xFAFF),
    ofUnicodeBlock(OF_ALPHABETIC_PRESENTATION_FORMS,0xFB00,0xFB4F),
    ofUnicodeBlock(OF_ARABIC_PRESENTATION_FORMS_A,0xFB50,0xFDFF),
    ofUnicodeBlock(OF_VARIATION_SELECTORS,0xFE00,0xFE0F),
    ofUnicodeBlock(OF_VERTICAL_FORMS,0xFE10,0xFE1F),
    ofUnicodeBlock(OF_COMBINING_HALF_MARKS,0xFE20,0xFE2F),
    ofUnicodeBlock(OF_CJK_COMPATIBILITY_FORMS,0xFE30,0xFE4F),
    ofUnicodeBlock(OF_SMALL_FORM_VARIANTS,0xFE50,0xFE6F),
    ofUnicodeBlock(OF_ARABIC_PRESENTATION_FORMS_B,0xFE70,0xFEFF),
    ofUnicodeBlock(OF_HALFWIDTH_AND_FULLWIDTH_FORMS,0xFF00,0xFFEF),
    ofUnicodeBlock(OF_SPECIALS,0xFFF0,0xFFFF),
    ofUnicodeBlock(OF_LINEAR_B_SYLLABARY,0x10000,0x1007F),
    ofUnicodeBlock(OF_LINEAR_B_IDEOGRAMS,0x10080,0x100FF),
    ofUnicodeBlock(OF_AEGEAN_NUMBERS,0x10100,0x1013F),
    ofUnicodeBlock(OF_ANCIENT_GREEK_NUMBERS,0x10140,0x1018F),
    ofUnicodeBlock(OF_ANCIENT_SYMBOLS,0x10190,0x101CF),
    ofUnicodeBlock(OF_PHAISTOS_DISC,0x101D0,0x101FF),
    ofUnicodeBlock(OF_LYCIAN,0x10280,0x1029F),
    ofUnicodeBlock(OF_CARIAN,0x102A0,0x102DF),
    ofUnicodeBlock(OF_OLD_ITALIC,0x10300,0x1032F),
    ofUnicodeBlock(OF_GOTHIC,0x10330,0x1034F),
    ofUnicodeBlock(OF_UGARITIC,0x10380,0x1039F),
    ofUnicodeBlock(OF_OLD_PERSIAN,0x103A0,0x103DF),
    ofUnicodeBlock(OF_DESERET,0x10400,0x1044F),
    ofUnicodeBlock(OF_SHAVIAN,0x10450,0x1047F),
    ofUnicodeBlock(OF_OSMANYA,0x10480,0x104AF),
    ofUnicodeBlock(OF_CYPRIOT_SYLLABARY,0x10800,0x1083F),
    ofUnicodeBlock(OF_IMPERIAL_ARAMAIC,0x10840,0x1085F),
    ofUnicodeBlock(OF_PHOENICIAN,0x10900,0x1091F),
    ofUnicodeBlock(OF_LYDIAN,0x10920,0x1093F),
    ofUnicodeBlock(OF_MEROITIC_HIEROGLYPHS,0x10980,0x1099F),
    ofUnicodeBlock(OF_MEROITIC_CURSIVE,0x109A0,0x109FF),
    ofUnicodeBlock(OF_KHAROSHTHI,0x10A00,0x10A5F),
    ofUnicodeBlock(OF_OLD_SOUTH_ARABIAN,0x10A60,0x10A7F),
    ofUnicodeBlock(OF_AVESTAN,0x10B00,0x10B3F),
    ofUnicodeBlock(OF_INSCRIPTIONAL_PARTHIAN,0x10B40,0x10B5F),
    ofUnicodeBlock(OF_INSCRIPTIONAL_PAHLAVI,0x10B60,0x10B7F),
    ofUnicodeBlock(OF_OLD_TURKIC,0x10C00,0x10C4F),
    ofUnicodeBlock(OF_RUMI_NUMERAL_SYMBOLS,0x10E60,0x10E7F),
    ofUnicodeBlock(OF_BRAHMI,0x11000,0x1107F),
    ofUnicodeBlock(OF_KAITHI,0x11080,0x110CF),
    ofUnicodeBlock(OF_SORA_SOMPENG,0x110D0,0x110FF),
    ofUnicodeBlock(OF_CHAKMA,0x11100,0x1114F),
    ofUnicodeBlock(OF_SHARADA,0x11180,0x111DF),
    ofUnicodeBlock(OF_TAKRI,0x11680,0x116CF),
    ofUnicodeBlock(OF_CUNEIFORM,0x12000,0x123FF),
    ofUnicodeBlock(OF_CUNEIFORM_NUMBERS_AND_PUNCTUATION,0x12400,0x1247F),
    ofUnicodeBlock(OF_EGYPTIAN_HIEROGLYPHS,0x13000,0x1342F),
    ofUnicodeBlock(OF_BAMUM_SUPPLEMENT,0x16800,0x16A3F),
    ofUnicodeBlock(OF_MIAO,0x16F00,0x16F9F),
    ofUnicodeBlock(OF_KANA_SUPPLEMENT,0x1B000,0x1B0FF),
    ofUnicodeBlock(OF_BYZANTINE_MUSICAL_SYMBOLS,0x1D000,0x1D0FF),
    ofUnicodeBlock(OF_MUSICAL_SYMBOLS,0x1D100,0x1D1FF),
    ofUnicodeBlock(OF_ANCIENT_GREEK_MUSICAL_NOTATION,0x1D200,0x1D24F),
    ofUnicodeBlock(OF_TAI_XUAN_JING_SYMBOLS,0x1D300,0x1D35F),
    ofUnicodeBlock(OF_COUNTING_ROD_NUMERALS,0x1D360,0x1D37F),
    ofUnicodeBlock(OF_MATHEMATICAL_ALPHANUMERIC_SYMBOLS,0x1D400,0x1D7FF),
    ofUnicodeBlock(OF_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,0x1EE00,0x1EEFF),
    ofUnicodeBlock(OF_MAHJONG_TILES,0x1F000,0x1F02F),
    ofUnicodeBlock(OF_DOMINO_TILES,0x1F030,0x1F09F),
    ofUnicodeBlock(OF_PLAYING_CARDS,0x1F0A0,0x1F0FF),
    ofUnicodeBlock(OF_ENCLOSED_ALPHANUMERIC_SUPPLEMENT,0x1F100,0x1F1FF),
    ofUnicodeBlock(OF_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,0x1F200,0x1F2FF),
    ofUnicodeBlock(OF_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,0x1F300,0x1F5FF),
    ofUnicodeBlock(OF_EMOTICONS,0x1F600,0x1F64F),
    ofUnicodeBlock(OF_TRANSPORT_AND_MAP_SYMBOLS,0x1F680,0x1F6FF),
    ofUnicodeBlock(OF_ALCHEMICAL_SYMBOLS,0x1F700,0x1F77F),
    ofUnicodeBlock(OF_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,0x20000,0x2A6DF),
    ofUnicodeBlock(OF_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,0x2A700,0x2B73F),
    ofUnicodeBlock(OF_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,0x2B740,0x2B81F),
    ofUnicodeBlock(OF_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,0x2F800,0x2FA1F),
    ofUnicodeBlock(OF_TAGS,0xE0000,0xE007F),
    ofUnicodeBlock(OF_VARIATION_SELECTORS_SUPPLEMENT,0xE0100,0xE01EF),
    ofUnicodeBlock(OF_SUPPLEMENTARY_PRIVATE_USE_AREA_A,0xF0000,0xFFFFF),
    ofUnicodeBlock(OF_SUPPLEMENTARY_PRIVATE_USE_AREA_B,0x100000,0x10FFFF)
};

template <>
std::string ofToUTF8(const ofUTF32Char& codepoint)
{
    std::string result;
    Poco::UnicodeConverter::toUTF8(&codepoint, 1, result);
    return result;
}


template <>
ofUTF16String ofToUTF16(const ofUTF32String& utf32string)
{
    return ofToUTF16(ofToUTF8(utf32string));
}


template <>
ofUTF32String ofToUTF32(const Poco::UTF16String& utf16string)
{
    return ofToUTF32(ofToUTF8(utf16string));
}


std::size_t ofUTFStringLength(const std::string& utf8string)
{
    return ofToUTF32(utf8string).size();
}


std::size_t ofUTFStringLength(const ofUTF16String& utf16string)
{
    return ofToUTF32(utf16string).size();
}


std::size_t ofUTFStringLength(const ofUTF32String& utf32string)
{
    return utf32string.size();
}


std::string ofUTFToUpper(const std::string& utf8string)
{
    return Poco::UTF8::toUpper(utf8string);
}


ofUTF16String ofUTFToUpper(const ofUTF16String& utf16string)
{
    return ofToUTF16(ofUTFToUpper(ofToUTF8(utf16string)));
}


ofUTF32String ofUTFToUpper(const ofUTF32String& utf32string)
{
    return ofToUTF32(ofUTFToUpper(ofToUTF8(utf32string)));
}


std::string ofUTFToLower(const std::string& utf8string)
{
    return Poco::UTF8::toLower(utf8string);
}


ofUTF16String ofUTFToLower(const ofUTF16String& utf16string)
{
    return ofToUTF16(ofUTFToLower(ofToUTF8(utf16string)));
}


ofUTF32String ofUTFToLower(const ofUTF32String& utf32string)
{
    return ofToUTF32(ofUTFToLower(ofToUTF8(utf32string)));
}


std::vector<char> ofBreakLines(const std::string& utf8string,
                               const std::string language)
{
    std::vector<char> breaks(utf8string.size());

    set_linebreaks_utf8(reinterpret_cast<const utf8_t*>(utf8string.c_str()),
                        utf8string.size(),
                        language.c_str(),
                        &breaks[0]);

    return breaks;
}

std::vector<char> ofGetLineBreaks(const ofUTF16String& utf16string,
                                  const std::string language)
{
    std::vector<char> breaks(utf16string.size());

    set_linebreaks_utf16(reinterpret_cast<const utf16_t*>(utf16string.c_str()),
                         utf16string.size(),
                         language.c_str(),
                         &breaks[0]);

    return breaks;
}

std::vector<char> ofGetLineBreaks(const ofUTF32String& utf32string,
                                  const std::string language)
{
    std::vector<char> breaks(utf32string.size());

    set_linebreaks_utf32(reinterpret_cast<const utf32_t*>(utf32string.c_str()),
                         utf32string.size(),
                         language.c_str(),
                         &breaks[0]);

    return breaks;
}

std::vector<char> ofGetWordBreaks(const std::string& utf8string,
                                  const std::string language)
{
    std::vector<char> breaks(utf8string.size());


    set_wordbreaks_utf8(reinterpret_cast<const utf8_t*>(utf8string.c_str()),
                        utf8string.size(),
                        language.c_str(),
                        &breaks[0]);

    return breaks;
}

std::vector<char> ofGetWordBreaks(const ofUTF16String& utf16string,
                                  const std::string language)
{
    std::vector<char> breaks(utf16string.size());

    set_wordbreaks_utf16(reinterpret_cast<const utf16_t*>(utf16string.c_str()),
                         utf16string.size(),
                         language.c_str(),
                         &breaks[0]);

    return breaks;
}

std::vector<char> ofGetWordBreaks(const ofUTF32String& utf32string,
                                  const std::string language)
{
    std::vector<char> breaks(utf32string.size());

    set_wordbreaks_utf32(reinterpret_cast<const utf32_t*>(utf32string.c_str()),
                         utf32string.size(),
                         language.c_str(),
                         &breaks[0]);

    return breaks;
}


std::set<ofUTF32Char> ofGetUnicodeBlock(ofUnicodeBlockName name)
{
    // TODO: Cache these small sets inside the unicode block object?
    return OF_UNICODE_BLOCKS[name].getSet();
}


std::string ofGetUnicodeBlockString(ofUnicodeBlockName name)
{
    std::string result;
    for (ofUTF32Char i = OF_UNICODE_BLOCKS[name].begin;
         i <= OF_UNICODE_BLOCKS[name].end;
         ++i)
    {
        result += ofToUTF8(i);
    }

    return result;
}

std::set<ofUTF32Char> ofGetDefaultUnicodeBlock()
{
    std::set<ofUTF32Char> s;
    for (ofUTF32Char i = 0; i < STANDARD_UNICODE_CHARS_LEN; ++i)
    {
        s.insert(STANDARD_UNICODE_CHARS[i]);
    }
    return s;
}

std::string ofGetDefaultUnicodeBlockString()
{
    std::string s;
    for (ofUTF32Char i = 0; i < STANDARD_UNICODE_CHARS_LEN; ++i)
    {
        s += ofToUTF8(STANDARD_UNICODE_CHARS[i]);
    }
    return s;
}


## ofUnicode.h
// =============================================================================
//
// Copyright (c) 2009-2013 Christopher Baker <http://christopherbaker.net>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// =============================================================================


#pragma once


#include <stdint.h>
#include <set>
#include <string>
#include "Poco/Unicode.h"
#include "Poco/UnicodeConverter.h"
#include "Poco/UTF8Encoding.h"
#include "Poco/UTF8String.h"
#include "Poco/UTF16Encoding.h"
#include "Poco/UTF32Encoding.h"
#include "Poco/TextIterator.h"
#include "linebreak.h"
#include "linebreakdef.h"
#include "wordbreak.h"
#include "wordbreakdef.h"
#include "ofConstants.h"


// TODO: add unicode block definitions
// http://www.unicode.org/Public/UNIDATA/Blocks.txt

enum ofUnicodeBlockName {
    OF_BASIC_LATIN = 0,
    OF_LATIN_1_SUPPLEMENT,
    OF_LATIN_EXTENDED_A,
    OF_LATIN_EXTENDED_B,
    OF_IPA_EXTENSIONS,
    OF_SPACING_MODIFIER_LETTERS,
    OF_COMBINING_DIACRITICAL_MARKS,
    OF_GREEK_AND_COPTIC,
    OF_CYRILLIC,
    OF_CYRILLIC_SUPPLEMENT,
    OF_ARMENIAN,
    OF_HEBREW,
    OF_ARABIC,
    OF_SYRIAC,
    OF_ARABIC_SUPPLEMENT,
    OF_THAANA,
    OF_NKO,
    OF_SAMARITAN,
    OF_MANDAIC,
    OF_ARABIC_EXTENDED_A,
    OF_DEVANAGARI,
    OF_BENGALI,
    OF_GURMUKHI,
    OF_GUJARATI,
    OF_ORIYA,
    OF_TAMIL,
    OF_TELUGU,
    OF_KANNADA,
    OF_MALAYALAM,
    OF_SINHALA,
    OF_THAI,
    OF_LAO,
    OF_TIBETAN,
    OF_MYANMAR,
    OF_GEORGIAN,
    OF_HANGUL_JAMO,
    OF_ETHIOPIC,
    OF_ETHIOPIC_SUPPLEMENT,
    OF_CHEROKEE,
    OF_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
    OF_OGHAM,
    OF_RUNIC,
    OF_TAGALOG,
    OF_HANUNOO,
    OF_BUHID,
    OF_TAGBANWA,
    OF_KHMER,
    OF_MONGOLIAN,
    OF_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
    OF_LIMBU,
    OF_TAI_LE,
    OF_NEW_TAI_LUE,
    OF_KHMER_SYMBOLS,
    OF_BUGINESE,
    OF_TAI_THAM,
    OF_BALINESE,
    OF_SUNDANESE,
    OF_BATAK,
    OF_LEPCHA,
    OF_OL_CHIKI,
    OF_SUNDANESE_SUPPLEMENT,
    OF_VEDIC_EXTENSIONS,
    OF_PHONETIC_EXTENSIONS,
    OF_PHONETIC_EXTENSIONS_SUPPLEMENT,
    OF_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
    OF_LATIN_EXTENDED_ADDITIONAL,
    OF_GREEK_EXTENDED,
    OF_GENERAL_PUNCTUATION,
    OF_SUPERSCRIPTS_AND_SUBSCRIPTS,
    OF_CURRENCY_SYMBOLS,
    OF_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS,
    OF_LETTERLIKE_SYMBOLS,
    OF_NUMBER_FORMS,
    OF_ARROWS,
    OF_MATHEMATICAL_OPERATORS,
    OF_MISCELLANEOUS_TECHNICAL,
    OF_CONTROL_PICTURES,
    OF_OPTICAL_CHARACTER_RECOGNITION,
    OF_ENCLOSED_ALPHANUMERICS,
    OF_BOX_DRAWING,
    OF_BLOCK_ELEMENTS,
    OF_GEOMETRIC_SHAPES,
    OF_MISCELLANEOUS_SYMBOLS,
    OF_DINGBATS,
    OF_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
    OF_SUPPLEMENTAL_ARROWS_A,
    OF_BRAILLE_PATTERNS,
    OF_SUPPLEMENTAL_ARROWS_B,
    OF_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
    OF_SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
    OF_MISCELLANEOUS_SYMBOLS_AND_ARROWS,
    OF_GLAGOLITIC,
    OF_LATIN_EXTENDED_C,
    OF_COPTIC,
    OF_GEORGIAN_SUPPLEMENT,
    OF_TIFINAGH,
    OF_ETHIOPIC_EXTENDED,
    OF_CYRILLIC_EXTENDED_A,
    OF_SUPPLEMENTAL_PUNCTUATION,
    OF_CJK_RADICALS_SUPPLEMENT,
    OF_KANGXI_RADICALS,
    OF_IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
    OF_CJK_SYMBOLS_AND_PUNCTUATION,
    OF_HIRAGANA,
    OF_KATAKANA,
    OF_BOPOMOFO,
    OF_HANGUL_COMPATIBILITY_JAMO,
    OF_KANBUN,
    OF_BOPOMOFO_EXTENDED,
    OF_CJK_STROKES,
    OF_KATAKANA_PHONETIC_EXTENSIONS,
    OF_ENCLOSED_CJK_LETTERS_AND_MONTHS,
    OF_CJK_COMPATIBILITY,
    OF_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
    OF_YIJING_HEXAGRAM_SYMBOLS,
    OF_CJK_UNIFIED_IDEOGRAPHS,
    OF_YI_SYLLABLES,
    OF_YI_RADICALS,
    OF_LISU,
    OF_VAI,
    OF_CYRILLIC_EXTENDED_B,
    OF_BAMUM,
    OF_MODIFIER_TONE_LETTERS,
    OF_LATIN_EXTENDED_D,
    OF_SYLOTI_NAGRI,
    OF_COMMON_INDIC_NUMBER_FORMS,
    OF_PHAGS_PA,
    OF_SAURASHTRA,
    OF_DEVANAGARI_EXTENDED,
    OF_KAYAH_LI,
    OF_REJANG,
    OF_HANGUL_JAMO_EXTENDED_A,
    OF_JAVANESE,
    OF_CHAM,
    OF_MYANMAR_EXTENDED_A,
    OF_TAI_VIET,
    OF_MEETEI_MAYEK_EXTENSIONS,
    OF_ETHIOPIC_EXTENDED_A,
    OF_MEETEI_MAYEK,
    OF_HANGUL_SYLLABLES,
    OF_HANGUL_JAMO_EXTENDED_B,
    OF_HIGH_SURROGATES,
    OF_HIGH_PRIVATE_USE_SURROGATES,
    OF_LOW_SURROGATES,
    OF_PRIVATE_USE_AREA,
    OF_CJK_COMPATIBILITY_IDEOGRAPHS,
    OF_ALPHABETIC_PRESENTATION_FORMS,
    OF_ARABIC_PRESENTATION_FORMS_A,
    OF_VARIATION_SELECTORS,
    OF_VERTICAL_FORMS,
    OF_COMBINING_HALF_MARKS,
    OF_CJK_COMPATIBILITY_FORMS,
    OF_SMALL_FORM_VARIANTS,
    OF_ARABIC_PRESENTATION_FORMS_B,
    OF_HALFWIDTH_AND_FULLWIDTH_FORMS,
    OF_SPECIALS,
    OF_LINEAR_B_SYLLABARY,
    OF_LINEAR_B_IDEOGRAMS,
    OF_AEGEAN_NUMBERS,
    OF_ANCIENT_GREEK_NUMBERS,
    OF_ANCIENT_SYMBOLS,
    OF_PHAISTOS_DISC,
    OF_LYCIAN,
    OF_CARIAN,
    OF_OLD_ITALIC,
    OF_GOTHIC,
    OF_UGARITIC,
    OF_OLD_PERSIAN,
    OF_DESERET,
    OF_SHAVIAN,
    OF_OSMANYA,
    OF_CYPRIOT_SYLLABARY,
    OF_IMPERIAL_ARAMAIC,
    OF_PHOENICIAN,
    OF_LYDIAN,
    OF_MEROITIC_HIEROGLYPHS,
    OF_MEROITIC_CURSIVE,
    OF_KHAROSHTHI,
    OF_OLD_SOUTH_ARABIAN,
    OF_AVESTAN,
    OF_INSCRIPTIONAL_PARTHIAN,
    OF_INSCRIPTIONAL_PAHLAVI,
    OF_OLD_TURKIC,
    OF_RUMI_NUMERAL_SYMBOLS,
    OF_BRAHMI,
    OF_KAITHI,
    OF_SORA_SOMPENG,
    OF_CHAKMA,
    OF_SHARADA,
    OF_TAKRI,
    OF_CUNEIFORM,
    OF_CUNEIFORM_NUMBERS_AND_PUNCTUATION,
    OF_EGYPTIAN_HIEROGLYPHS,
    OF_BAMUM_SUPPLEMENT,
    OF_MIAO,
    OF_KANA_SUPPLEMENT,
    OF_BYZANTINE_MUSICAL_SYMBOLS,
    OF_MUSICAL_SYMBOLS,
    OF_ANCIENT_GREEK_MUSICAL_NOTATION,
    OF_TAI_XUAN_JING_SYMBOLS,
    OF_COUNTING_ROD_NUMERALS,
    OF_MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
    OF_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
    OF_MAHJONG_TILES,
    OF_DOMINO_TILES,
    OF_PLAYING_CARDS,
    OF_ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
    OF_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
    OF_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
    OF_EMOTICONS,
    OF_TRANSPORT_AND_MAP_SYMBOLS,
    OF_ALCHEMICAL_SYMBOLS,
    OF_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
    OF_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
    OF_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
    OF_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
    OF_TAGS,
    OF_VARIATION_SELECTORS_SUPPLEMENT,
    OF_SUPPLEMENTARY_PRIVATE_USE_AREA_A,
    OF_SUPPLEMENTARY_PRIVATE_USE_AREA_B
};


class ofUnicodeBlock
{
public:
    ofUnicodeBlock(ofUnicodeBlockName _name,
                   ofUTF32Char _begin,
                   ofUTF32Char _end):
        name(_name),
        begin(_begin),
        end(_end)
    {
    }

    std::set<ofUTF32Char> getSet() const
    {
        std::set<ofUTF32Char> s;
        for (ofUTF32Char i = begin; i <= end; ++i)
        {
            s.insert(i);
        }
        return s;
    }

    ofUnicodeBlockName name;
    ofUTF32Char begin;
    ofUTF32Char end;
};

/// \brief Convert a UTF string to UTF8
/// \param f A data type to be converted.
/// \returns A UTF8 encoded string.
template <typename F>
std::string ofToUTF8(const F& f)
{
    std::string result;
    Poco::UnicodeConverter::toUTF8(f, result);
    return result;
}

template <>
std::string ofToUTF8(const ofUTF32Char& codepoint);


/// \brief Convert a UTF string to UTF16
/// \param f A data type to be converted.
/// \returns A UTF16 encoded string.
template <typename F>
ofUTF16String ofToUTF16(const F& f)
{
    Poco::UTF16String result;
    Poco::UnicodeConverter::toUTF16(f, result);
    return result;
}

/// \brief Convert a UTF string to UTF16
/// \param f A data type to be converted.
/// \returns A UTF16 encoded string.
template <>
ofUTF16String ofToUTF16(const ofUTF32String& utf32string);


/// \brief Convert a UTF string to UTF32
/// \param f A data type to be converted.
/// \returns A UTF32 encoded string.
template <typename F>
ofUTF32String ofToUTF32(const F& f)
{
    Poco::UTF32String result;
    Poco::UnicodeConverter::toUTF32(f, result);
    return result;
}

/// \brief Convert a UTF string to UTF32
/// \param utf16string A UTF16 encoded string.
/// \returns A UTF32 encoded string.
template <>
ofUTF32String ofToUTF32(const Poco::UTF16String& utf16string);

/// \brief Get the number of Unicode codepoints encoded in the string.
///
/// When encoded with UTF8, a single Unicode codepoints is encoded with between
/// 1 and 4 code units (bytes).
///
/// \param utf8string A UTF8 encoded string.
/// \returns The number of Unicode codepoints encoded in the string.
std::size_t ofUTFStringLength(const std::string& utf8string);

/// \brief Get the number of Unicode codepoints encoded in the string.
///
/// When encoded with UTF16, a single Unicode codepoints is encoded with between
/// 1 and 2 code units.
///
/// \param utf16string A UTF16 encoded string.
/// \returns The number of Unicode codepoints encoded in the string.
std::size_t ofUTFStringLength(const ofUTF16String& utf16string);

/// \brief Get the number of Unicode codepoints encoded in the string.
///
/// For ofUTF32String, the number of codepoints is equal to its length.
///
/// \param utf32string A UTF8 encoded string.
/// \returns The number of Unicode codepoints encoded in the string.
std::size_t ofUTFStringLength(const ofUTF32String& utf32string);


/// \brief Transform a UTF string to upper case.
/// \param utf8string A UTF8 encoded string.
/// \returns The transformed string.
std::string ofUTFToUpper(const std::string& utf8string);


/// \brief Transform a UTF string to upper case.
/// \param utf16string A UTF16 encoded string.
/// \returns The transformed string.
ofUTF16String ofUTFToUpper(const ofUTF16String& utf16string);


/// \brief Transform a UTF string to upper case.
/// \param utf32string A UTF32 encoded string.
/// \returns The transformed string.
ofUTF32String ofUTFToUpper(const ofUTF32String& utf32string);


/// \brief Transform a UTF string to lower case.
/// \param utf8string A UTF8 encoded string.
/// \returns The transformed string.
std::string ofUTFToLower(const std::string& utf8string);


/// \brief Transform a UTF string to upper case.
/// \param utf16string A UTF16 encoded string.
/// \returns The transformed string.
ofUTF16String ofUTFToLower(const ofUTF16String& utf16string);


/// \brief Transform a UTF string to upper case.
/// \param utf32string A UTF32 encoded string.
/// \returns The transformed string.
ofUTF32String ofUTFToLower(const ofUTF32String& utf32string);


std::vector<char> ofGetLineBreaks(const std::string& utf8string,
                                  const std::string language = "en");

std::vector<char> ofGetLineBreaks(const ofUTF16String& utf16string,
                                  const std::string language = "en");

std::vector<char> ofGetLineBreaks(const ofUTF32String& utf32string,
                                  const std::string language = "en");

std::vector<char> ofGetWordBreaks(const std::string& utf8string,
                                  const std::string language = "en");

std::vector<char> ofGetWordBreaks(const ofUTF16String& utf16string,
                                  const std::string language = "en");

std::vector<char> ofGetWordBreaks(const ofUTF32String& utf32string,
                                  const std::string language = "en");

std::set<ofUTF32Char> ofGetUnicodeBlock(ofUnicodeBlockName name);
std::string ofGetUnicodeBlockString(ofUnicodeBlockName name);

std::set<ofUTF32Char> ofGetDefaultUnicodeBlock();
std::string ofGetDefaultUnicodeBlockString();
	// =============================================================================
	//
	// Copyright (c) 2009-2013 Christopher Baker <http://christopherbaker.net>
	//
	// Permission is hereby granted, free of charge, to any person obtaining a copy
	// of this software and associated documentation files (the "Software"), to deal
	// in the Software without restriction, including without limitation the rights
	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	// copies of the Software, and to permit persons to whom the Software is
	// furnished to do so, subject to the following conditions:
	//
	// The above copyright notice and this permission notice shall be included in
	// all copies or substantial portions of the Software.
	//
	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	// THE SOFTWARE.
	//
	// =============================================================================


	#include "ofUnicode.h"
	#include "Poco/Buffer.h"

	// http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT // win
	// http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT // mac
	// http://unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT // linux


	static const std::size_t STANDARD_UNICODE_CHARS_LEN = 228;
	static const ofUTF32Char STANDARD_UNICODE_CHARS[] = {
	0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
	0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
	0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
	0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
	0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
	0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
	0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
	0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
	0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
	0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
	0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
	0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
	0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
	0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
	0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
	0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF,
	0x0102, 0x0103, 0x0104, 0x0105, 0x0106, 0x0107, 0x010C, 0x010D,
	0x010E, 0x010F, 0x0110, 0x0111, 0x0118, 0x0119, 0x011A, 0x011B,
	0x0131, 0x0139, 0x013A, 0x013D, 0x013E, 0x0141, 0x0142, 0x0143,
	0x0144, 0x0147, 0x0148, 0x0150, 0x0151, 0x0152, 0x0153, 0x0154,
	0x0155, 0x0158, 0x0159, 0x015A, 0x015B, 0x015E, 0x015F, 0x0160,
	0x0161, 0x0162, 0x0163, 0x0164, 0x0165, 0x016E, 0x016F, 0x0170,
	0x0171, 0x0178, 0x0179, 0x017A, 0x017B, 0x017C, 0x017D, 0x017E,
	0x0192, 0x02C6, 0x02C7, 0x02D8, 0x02D9, 0x02DA, 0x02DB, 0x02DC,
	0x02DD, 0x03A9, 0x03C0, 0x2013, 0x2014, 0x2018, 0x2019, 0x201A,
	0x201C, 0x201D, 0x201E, 0x2020, 0x2021, 0x2022, 0x2026, 0x2030,
	0x2039, 0x203A, 0x2044, 0x20AC, 0x2122, 0x2202, 0x2206, 0x220F,
	0x2211, 0x221A, 0x221E, 0x222B, 0x2248, 0x2260, 0x2264, 0x2265,
	0x25CA, 0xF8FF, 0xFB01, 0xFB02
	};

	static const ofUnicodeBlock OF_UNICODE_BLOCKS[] =
	{
	ofUnicodeBlock(OF_BASIC_LATIN,0x0000,0x007F),
	ofUnicodeBlock(OF_LATIN_1_SUPPLEMENT,0x0080,0x00FF),
	ofUnicodeBlock(OF_LATIN_EXTENDED_A,0x0100,0x017F),
	ofUnicodeBlock(OF_LATIN_EXTENDED_B,0x0180,0x024F),
	ofUnicodeBlock(OF_IPA_EXTENSIONS,0x0250,0x02AF),
	ofUnicodeBlock(OF_SPACING_MODIFIER_LETTERS,0x02B0,0x02FF),
	ofUnicodeBlock(OF_COMBINING_DIACRITICAL_MARKS,0x0300,0x036F),
	ofUnicodeBlock(OF_GREEK_AND_COPTIC,0x0370,0x03FF),
	ofUnicodeBlock(OF_CYRILLIC,0x0400,0x04FF),
	ofUnicodeBlock(OF_CYRILLIC_SUPPLEMENT,0x0500,0x052F),
	ofUnicodeBlock(OF_ARMENIAN,0x0530,0x058F),
	ofUnicodeBlock(OF_HEBREW,0x0590,0x05FF),
	ofUnicodeBlock(OF_ARABIC,0x0600,0x06FF),
	ofUnicodeBlock(OF_SYRIAC,0x0700,0x074F),
	ofUnicodeBlock(OF_ARABIC_SUPPLEMENT,0x0750,0x077F),
	ofUnicodeBlock(OF_THAANA,0x0780,0x07BF),
	ofUnicodeBlock(OF_NKO,0x07C0,0x07FF),
	ofUnicodeBlock(OF_SAMARITAN,0x0800,0x083F),
	ofUnicodeBlock(OF_MANDAIC,0x0840,0x085F),
	ofUnicodeBlock(OF_ARABIC_EXTENDED_A,0x08A0,0x08FF),
	ofUnicodeBlock(OF_DEVANAGARI,0x0900,0x097F),
	ofUnicodeBlock(OF_BENGALI,0x0980,0x09FF),
	ofUnicodeBlock(OF_GURMUKHI,0x0A00,0x0A7F),
	ofUnicodeBlock(OF_GUJARATI,0x0A80,0x0AFF),
	ofUnicodeBlock(OF_ORIYA,0x0B00,0x0B7F),
	ofUnicodeBlock(OF_TAMIL,0x0B80,0x0BFF),
	ofUnicodeBlock(OF_TELUGU,0x0C00,0x0C7F),
	ofUnicodeBlock(OF_KANNADA,0x0C80,0x0CFF),
	ofUnicodeBlock(OF_MALAYALAM,0x0D00,0x0D7F),
	ofUnicodeBlock(OF_SINHALA,0x0D80,0x0DFF),
	ofUnicodeBlock(OF_THAI,0x0E00,0x0E7F),
	ofUnicodeBlock(OF_LAO,0x0E80,0x0EFF),
	ofUnicodeBlock(OF_TIBETAN,0x0F00,0x0FFF),
	ofUnicodeBlock(OF_MYANMAR,0x1000,0x109F),
	ofUnicodeBlock(OF_GEORGIAN,0x10A0,0x10FF),
	ofUnicodeBlock(OF_HANGUL_JAMO,0x1100,0x11FF),
	ofUnicodeBlock(OF_ETHIOPIC,0x1200,0x137F),
	ofUnicodeBlock(OF_ETHIOPIC_SUPPLEMENT,0x1380,0x139F),
	ofUnicodeBlock(OF_CHEROKEE,0x13A0,0x13FF),
	ofUnicodeBlock(OF_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,0x1400,0x167F),
	ofUnicodeBlock(OF_OGHAM,0x1680,0x169F),
	ofUnicodeBlock(OF_RUNIC,0x16A0,0x16FF),
	ofUnicodeBlock(OF_TAGALOG,0x1700,0x171F),
	ofUnicodeBlock(OF_HANUNOO,0x1720,0x173F),
	ofUnicodeBlock(OF_BUHID,0x1740,0x175F),
	ofUnicodeBlock(OF_TAGBANWA,0x1760,0x177F),
	ofUnicodeBlock(OF_KHMER,0x1780,0x17FF),
	ofUnicodeBlock(OF_MONGOLIAN,0x1800,0x18AF),
	ofUnicodeBlock(OF_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,0x18B0,0x18FF),
	ofUnicodeBlock(OF_LIMBU,0x1900,0x194F),
	ofUnicodeBlock(OF_TAI_LE,0x1950,0x197F),
	ofUnicodeBlock(OF_NEW_TAI_LUE,0x1980,0x19DF),
	ofUnicodeBlock(OF_KHMER_SYMBOLS,0x19E0,0x19FF),
	ofUnicodeBlock(OF_BUGINESE,0x1A00,0x1A1F),
	ofUnicodeBlock(OF_TAI_THAM,0x1A20,0x1AAF),
	ofUnicodeBlock(OF_BALINESE,0x1B00,0x1B7F),
	ofUnicodeBlock(OF_SUNDANESE,0x1B80,0x1BBF),
	ofUnicodeBlock(OF_BATAK,0x1BC0,0x1BFF),
	ofUnicodeBlock(OF_LEPCHA,0x1C00,0x1C4F),
	ofUnicodeBlock(OF_OL_CHIKI,0x1C50,0x1C7F),
	ofUnicodeBlock(OF_SUNDANESE_SUPPLEMENT,0x1CC0,0x1CCF),
	ofUnicodeBlock(OF_VEDIC_EXTENSIONS,0x1CD0,0x1CFF),
	ofUnicodeBlock(OF_PHONETIC_EXTENSIONS,0x1D00,0x1D7F),
	ofUnicodeBlock(OF_PHONETIC_EXTENSIONS_SUPPLEMENT,0x1D80,0x1DBF),
	ofUnicodeBlock(OF_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,0x1DC0,0x1DFF),
	ofUnicodeBlock(OF_LATIN_EXTENDED_ADDITIONAL,0x1E00,0x1EFF),
	ofUnicodeBlock(OF_GREEK_EXTENDED,0x1F00,0x1FFF),
	ofUnicodeBlock(OF_GENERAL_PUNCTUATION,0x2000,0x206F),
	ofUnicodeBlock(OF_SUPERSCRIPTS_AND_SUBSCRIPTS,0x2070,0x209F),
	ofUnicodeBlock(OF_CURRENCY_SYMBOLS,0x20A0,0x20CF),
	ofUnicodeBlock(OF_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS,0x20D0,0x20FF),
	ofUnicodeBlock(OF_LETTERLIKE_SYMBOLS,0x2100,0x214F),
	ofUnicodeBlock(OF_NUMBER_FORMS,0x2150,0x218F),
	ofUnicodeBlock(OF_ARROWS,0x2190,0x21FF),
	ofUnicodeBlock(OF_MATHEMATICAL_OPERATORS,0x2200,0x22FF),
	ofUnicodeBlock(OF_MISCELLANEOUS_TECHNICAL,0x2300,0x23FF),
	ofUnicodeBlock(OF_CONTROL_PICTURES,0x2400,0x243F),
	ofUnicodeBlock(OF_OPTICAL_CHARACTER_RECOGNITION,0x2440,0x245F),
	ofUnicodeBlock(OF_ENCLOSED_ALPHANUMERICS,0x2460,0x24FF),
	ofUnicodeBlock(OF_BOX_DRAWING,0x2500,0x257F),
	ofUnicodeBlock(OF_BLOCK_ELEMENTS,0x2580,0x259F),
	ofUnicodeBlock(OF_GEOMETRIC_SHAPES,0x25A0,0x25FF),
	ofUnicodeBlock(OF_MISCELLANEOUS_SYMBOLS,0x2600,0x26FF),
	ofUnicodeBlock(OF_DINGBATS,0x2700,0x27BF),
	ofUnicodeBlock(OF_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,0x27C0,0x27EF),
	ofUnicodeBlock(OF_SUPPLEMENTAL_ARROWS_A,0x27F0,0x27FF),
	ofUnicodeBlock(OF_BRAILLE_PATTERNS,0x2800,0x28FF),
	ofUnicodeBlock(OF_SUPPLEMENTAL_ARROWS_B,0x2900,0x297F),
	ofUnicodeBlock(OF_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,0x2980,0x29FF),
	ofUnicodeBlock(OF_SUPPLEMENTAL_MATHEMATICAL_OPERATORS,0x2A00,0x2AFF),
	ofUnicodeBlock(OF_MISCELLANEOUS_SYMBOLS_AND_ARROWS,0x2B00,0x2BFF),
	ofUnicodeBlock(OF_GLAGOLITIC,0x2C00,0x2C5F),
	ofUnicodeBlock(OF_LATIN_EXTENDED_C,0x2C60,0x2C7F),
	ofUnicodeBlock(OF_COPTIC,0x2C80,0x2CFF),
	ofUnicodeBlock(OF_GEORGIAN_SUPPLEMENT,0x2D00,0x2D2F),
	ofUnicodeBlock(OF_TIFINAGH,0x2D30,0x2D7F),
	ofUnicodeBlock(OF_ETHIOPIC_EXTENDED,0x2D80,0x2DDF),
	ofUnicodeBlock(OF_CYRILLIC_EXTENDED_A,0x2DE0,0x2DFF),
	ofUnicodeBlock(OF_SUPPLEMENTAL_PUNCTUATION,0x2E00,0x2E7F),
	ofUnicodeBlock(OF_CJK_RADICALS_SUPPLEMENT,0x2E80,0x2EFF),
	ofUnicodeBlock(OF_KANGXI_RADICALS,0x2F00,0x2FDF),
	ofUnicodeBlock(OF_IDEOGRAPHIC_DESCRIPTION_CHARACTERS,0x2FF0,0x2FFF),
	ofUnicodeBlock(OF_CJK_SYMBOLS_AND_PUNCTUATION,0x3000,0x303F),
	ofUnicodeBlock(OF_HIRAGANA,0x3040,0x309F),
	ofUnicodeBlock(OF_KATAKANA,0x30A0,0x30FF),
	ofUnicodeBlock(OF_BOPOMOFO,0x3100,0x312F),
	ofUnicodeBlock(OF_HANGUL_COMPATIBILITY_JAMO,0x3130,0x318F),
	ofUnicodeBlock(OF_KANBUN,0x3190,0x319F),
	ofUnicodeBlock(OF_BOPOMOFO_EXTENDED,0x31A0,0x31BF),
	ofUnicodeBlock(OF_CJK_STROKES,0x31C0,0x31EF),
	ofUnicodeBlock(OF_KATAKANA_PHONETIC_EXTENSIONS,0x31F0,0x31FF),
	ofUnicodeBlock(OF_ENCLOSED_CJK_LETTERS_AND_MONTHS,0x3200,0x32FF),
	ofUnicodeBlock(OF_CJK_COMPATIBILITY,0x3300,0x33FF),
	ofUnicodeBlock(OF_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,0x3400,0x4DBF),
	ofUnicodeBlock(OF_YIJING_HEXAGRAM_SYMBOLS,0x4DC0,0x4DFF),
	ofUnicodeBlock(OF_CJK_UNIFIED_IDEOGRAPHS,0x4E00,0x9FFF),
	ofUnicodeBlock(OF_YI_SYLLABLES,0xA000,0xA48F),
	ofUnicodeBlock(OF_YI_RADICALS,0xA490,0xA4CF),
	ofUnicodeBlock(OF_LISU,0xA4D0,0xA4FF),
	ofUnicodeBlock(OF_VAI,0xA500,0xA63F),
	ofUnicodeBlock(OF_CYRILLIC_EXTENDED_B,0xA640,0xA69F),
	ofUnicodeBlock(OF_BAMUM,0xA6A0,0xA6FF),
	ofUnicodeBlock(OF_MODIFIER_TONE_LETTERS,0xA700,0xA71F),
	ofUnicodeBlock(OF_LATIN_EXTENDED_D,0xA720,0xA7FF),
	ofUnicodeBlock(OF_SYLOTI_NAGRI,0xA800,0xA82F),
	ofUnicodeBlock(OF_COMMON_INDIC_NUMBER_FORMS,0xA830,0xA83F),
	ofUnicodeBlock(OF_PHAGS_PA,0xA840,0xA87F),
	ofUnicodeBlock(OF_SAURASHTRA,0xA880,0xA8DF),
	ofUnicodeBlock(OF_DEVANAGARI_EXTENDED,0xA8E0,0xA8FF),
	ofUnicodeBlock(OF_KAYAH_LI,0xA900,0xA92F),
	ofUnicodeBlock(OF_REJANG,0xA930,0xA95F),
	ofUnicodeBlock(OF_HANGUL_JAMO_EXTENDED_A,0xA960,0xA97F),
	ofUnicodeBlock(OF_JAVANESE,0xA980,0xA9DF),
	ofUnicodeBlock(OF_CHAM,0xAA00,0xAA5F),
	ofUnicodeBlock(OF_MYANMAR_EXTENDED_A,0xAA60,0xAA7F),
	ofUnicodeBlock(OF_TAI_VIET,0xAA80,0xAADF),
	ofUnicodeBlock(OF_MEETEI_MAYEK_EXTENSIONS,0xAAE0,0xAAFF),
	ofUnicodeBlock(OF_ETHIOPIC_EXTENDED_A,0xAB00,0xAB2F),
	ofUnicodeBlock(OF_MEETEI_MAYEK,0xABC0,0xABFF),
	ofUnicodeBlock(OF_HANGUL_SYLLABLES,0xAC00,0xD7AF),
	ofUnicodeBlock(OF_HANGUL_JAMO_EXTENDED_B,0xD7B0,0xD7FF),
	ofUnicodeBlock(OF_HIGH_SURROGATES,0xD800,0xDB7F),
	ofUnicodeBlock(OF_HIGH_PRIVATE_USE_SURROGATES,0xDB80,0xDBFF),
	ofUnicodeBlock(OF_LOW_SURROGATES,0xDC00,0xDFFF),
	ofUnicodeBlock(OF_PRIVATE_USE_AREA,0xE000,0xF8FF),
	ofUnicodeBlock(OF_CJK_COMPATIBILITY_IDEOGRAPHS,0xF900,0xFAFF),
	ofUnicodeBlock(OF_ALPHABETIC_PRESENTATION_FORMS,0xFB00,0xFB4F),
	ofUnicodeBlock(OF_ARABIC_PRESENTATION_FORMS_A,0xFB50,0xFDFF),
	ofUnicodeBlock(OF_VARIATION_SELECTORS,0xFE00,0xFE0F),
	ofUnicodeBlock(OF_VERTICAL_FORMS,0xFE10,0xFE1F),
	ofUnicodeBlock(OF_COMBINING_HALF_MARKS,0xFE20,0xFE2F),
	ofUnicodeBlock(OF_CJK_COMPATIBILITY_FORMS,0xFE30,0xFE4F),
	ofUnicodeBlock(OF_SMALL_FORM_VARIANTS,0xFE50,0xFE6F),
	ofUnicodeBlock(OF_ARABIC_PRESENTATION_FORMS_B,0xFE70,0xFEFF),
	ofUnicodeBlock(OF_HALFWIDTH_AND_FULLWIDTH_FORMS,0xFF00,0xFFEF),
	ofUnicodeBlock(OF_SPECIALS,0xFFF0,0xFFFF),
	ofUnicodeBlock(OF_LINEAR_B_SYLLABARY,0x10000,0x1007F),
	ofUnicodeBlock(OF_LINEAR_B_IDEOGRAMS,0x10080,0x100FF),
	ofUnicodeBlock(OF_AEGEAN_NUMBERS,0x10100,0x1013F),
	ofUnicodeBlock(OF_ANCIENT_GREEK_NUMBERS,0x10140,0x1018F),
	ofUnicodeBlock(OF_ANCIENT_SYMBOLS,0x10190,0x101CF),
	ofUnicodeBlock(OF_PHAISTOS_DISC,0x101D0,0x101FF),
	ofUnicodeBlock(OF_LYCIAN,0x10280,0x1029F),
	ofUnicodeBlock(OF_CARIAN,0x102A0,0x102DF),
	ofUnicodeBlock(OF_OLD_ITALIC,0x10300,0x1032F),
	ofUnicodeBlock(OF_GOTHIC,0x10330,0x1034F),
	ofUnicodeBlock(OF_UGARITIC,0x10380,0x1039F),
	ofUnicodeBlock(OF_OLD_PERSIAN,0x103A0,0x103DF),
	ofUnicodeBlock(OF_DESERET,0x10400,0x1044F),
	ofUnicodeBlock(OF_SHAVIAN,0x10450,0x1047F),
	ofUnicodeBlock(OF_OSMANYA,0x10480,0x104AF),
	ofUnicodeBlock(OF_CYPRIOT_SYLLABARY,0x10800,0x1083F),
	ofUnicodeBlock(OF_IMPERIAL_ARAMAIC,0x10840,0x1085F),
	ofUnicodeBlock(OF_PHOENICIAN,0x10900,0x1091F),
	ofUnicodeBlock(OF_LYDIAN,0x10920,0x1093F),
	ofUnicodeBlock(OF_MEROITIC_HIEROGLYPHS,0x10980,0x1099F),
	ofUnicodeBlock(OF_MEROITIC_CURSIVE,0x109A0,0x109FF),
	ofUnicodeBlock(OF_KHAROSHTHI,0x10A00,0x10A5F),
	ofUnicodeBlock(OF_OLD_SOUTH_ARABIAN,0x10A60,0x10A7F),
	ofUnicodeBlock(OF_AVESTAN,0x10B00,0x10B3F),
	ofUnicodeBlock(OF_INSCRIPTIONAL_PARTHIAN,0x10B40,0x10B5F),
	ofUnicodeBlock(OF_INSCRIPTIONAL_PAHLAVI,0x10B60,0x10B7F),
	ofUnicodeBlock(OF_OLD_TURKIC,0x10C00,0x10C4F),
	ofUnicodeBlock(OF_RUMI_NUMERAL_SYMBOLS,0x10E60,0x10E7F),
	ofUnicodeBlock(OF_BRAHMI,0x11000,0x1107F),
	ofUnicodeBlock(OF_KAITHI,0x11080,0x110CF),
	ofUnicodeBlock(OF_SORA_SOMPENG,0x110D0,0x110FF),
	ofUnicodeBlock(OF_CHAKMA,0x11100,0x1114F),
	ofUnicodeBlock(OF_SHARADA,0x11180,0x111DF),
	ofUnicodeBlock(OF_TAKRI,0x11680,0x116CF),
	ofUnicodeBlock(OF_CUNEIFORM,0x12000,0x123FF),
	ofUnicodeBlock(OF_CUNEIFORM_NUMBERS_AND_PUNCTUATION,0x12400,0x1247F),
	ofUnicodeBlock(OF_EGYPTIAN_HIEROGLYPHS,0x13000,0x1342F),
	ofUnicodeBlock(OF_BAMUM_SUPPLEMENT,0x16800,0x16A3F),
	ofUnicodeBlock(OF_MIAO,0x16F00,0x16F9F),
	ofUnicodeBlock(OF_KANA_SUPPLEMENT,0x1B000,0x1B0FF),
	ofUnicodeBlock(OF_BYZANTINE_MUSICAL_SYMBOLS,0x1D000,0x1D0FF),
	ofUnicodeBlock(OF_MUSICAL_SYMBOLS,0x1D100,0x1D1FF),
	ofUnicodeBlock(OF_ANCIENT_GREEK_MUSICAL_NOTATION,0x1D200,0x1D24F),
	ofUnicodeBlock(OF_TAI_XUAN_JING_SYMBOLS,0x1D300,0x1D35F),
	ofUnicodeBlock(OF_COUNTING_ROD_NUMERALS,0x1D360,0x1D37F),
	ofUnicodeBlock(OF_MATHEMATICAL_ALPHANUMERIC_SYMBOLS,0x1D400,0x1D7FF),
	ofUnicodeBlock(OF_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,0x1EE00,0x1EEFF),
	ofUnicodeBlock(OF_MAHJONG_TILES,0x1F000,0x1F02F),
	ofUnicodeBlock(OF_DOMINO_TILES,0x1F030,0x1F09F),
	ofUnicodeBlock(OF_PLAYING_CARDS,0x1F0A0,0x1F0FF),
	ofUnicodeBlock(OF_ENCLOSED_ALPHANUMERIC_SUPPLEMENT,0x1F100,0x1F1FF),
	ofUnicodeBlock(OF_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,0x1F200,0x1F2FF),
	ofUnicodeBlock(OF_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,0x1F300,0x1F5FF),
	ofUnicodeBlock(OF_EMOTICONS,0x1F600,0x1F64F),
	ofUnicodeBlock(OF_TRANSPORT_AND_MAP_SYMBOLS,0x1F680,0x1F6FF),
	ofUnicodeBlock(OF_ALCHEMICAL_SYMBOLS,0x1F700,0x1F77F),
	ofUnicodeBlock(OF_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,0x20000,0x2A6DF),
	ofUnicodeBlock(OF_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,0x2A700,0x2B73F),
	ofUnicodeBlock(OF_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,0x2B740,0x2B81F),
	ofUnicodeBlock(OF_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,0x2F800,0x2FA1F),
	ofUnicodeBlock(OF_TAGS,0xE0000,0xE007F),
	ofUnicodeBlock(OF_VARIATION_SELECTORS_SUPPLEMENT,0xE0100,0xE01EF),
	ofUnicodeBlock(OF_SUPPLEMENTARY_PRIVATE_USE_AREA_A,0xF0000,0xFFFFF),
	ofUnicodeBlock(OF_SUPPLEMENTARY_PRIVATE_USE_AREA_B,0x100000,0x10FFFF)
	};

	template <>
	std::string ofToUTF8(const ofUTF32Char& codepoint)
	{
	std::string result;
	Poco::UnicodeConverter::toUTF8(&codepoint, 1, result);
	return result;
	}


	template <>
	ofUTF16String ofToUTF16(const ofUTF32String& utf32string)
	{
	return ofToUTF16(ofToUTF8(utf32string));
	}


	template <>
	ofUTF32String ofToUTF32(const Poco::UTF16String& utf16string)
	{
	return ofToUTF32(ofToUTF8(utf16string));
	}


	std::size_t ofUTFStringLength(const std::string& utf8string)
	{
	return ofToUTF32(utf8string).size();
	}


	std::size_t ofUTFStringLength(const ofUTF16String& utf16string)
	{
	return ofToUTF32(utf16string).size();
	}


	std::size_t ofUTFStringLength(const ofUTF32String& utf32string)
	{
	return utf32string.size();
	}


	std::string ofUTFToUpper(const std::string& utf8string)
	{
	return Poco::UTF8::toUpper(utf8string);
	}


	ofUTF16String ofUTFToUpper(const ofUTF16String& utf16string)
	{
	return ofToUTF16(ofUTFToUpper(ofToUTF8(utf16string)));
	}


	ofUTF32String ofUTFToUpper(const ofUTF32String& utf32string)
	{
	return ofToUTF32(ofUTFToUpper(ofToUTF8(utf32string)));
	}


	std::string ofUTFToLower(const std::string& utf8string)
	{
	return Poco::UTF8::toLower(utf8string);
	}


	ofUTF16String ofUTFToLower(const ofUTF16String& utf16string)
	{
	return ofToUTF16(ofUTFToLower(ofToUTF8(utf16string)));
	}


	ofUTF32String ofUTFToLower(const ofUTF32String& utf32string)
	{
	return ofToUTF32(ofUTFToLower(ofToUTF8(utf32string)));
	}


	std::vector<char> ofBreakLines(const std::string& utf8string,
	const std::string language)
	{
	std::vector<char> breaks(utf8string.size());

	set_linebreaks_utf8(reinterpret_cast<const utf8_t*>(utf8string.c_str()),
	utf8string.size(),
	language.c_str(),
	&breaks[0]);

	return breaks;
	}

	std::vector<char> ofGetLineBreaks(const ofUTF16String& utf16string,
	const std::string language)
	{
	std::vector<char> breaks(utf16string.size());

	set_linebreaks_utf16(reinterpret_cast<const utf16_t*>(utf16string.c_str()),
	utf16string.size(),
	language.c_str(),
	&breaks[0]);

	return breaks;
	}

	std::vector<char> ofGetLineBreaks(const ofUTF32String& utf32string,
	const std::string language)
	{
	std::vector<char> breaks(utf32string.size());

	set_linebreaks_utf32(reinterpret_cast<const utf32_t*>(utf32string.c_str()),
	utf32string.size(),
	language.c_str(),
	&breaks[0]);

	return breaks;
	}

	std::vector<char> ofGetWordBreaks(const std::string& utf8string,
	const std::string language)
	{
	std::vector<char> breaks(utf8string.size());


	set_wordbreaks_utf8(reinterpret_cast<const utf8_t*>(utf8string.c_str()),
	utf8string.size(),
	language.c_str(),
	&breaks[0]);

	return breaks;
	}

	std::vector<char> ofGetWordBreaks(const ofUTF16String& utf16string,
	const std::string language)
	{
	std::vector<char> breaks(utf16string.size());

	set_wordbreaks_utf16(reinterpret_cast<const utf16_t*>(utf16string.c_str()),
	utf16string.size(),
	language.c_str(),
	&breaks[0]);

	return breaks;
	}

	std::vector<char> ofGetWordBreaks(const ofUTF32String& utf32string,
	const std::string language)
	{
	std::vector<char> breaks(utf32string.size());

	set_wordbreaks_utf32(reinterpret_cast<const utf32_t*>(utf32string.c_str()),
	utf32string.size(),
	language.c_str(),
	&breaks[0]);

	return breaks;
	}


	std::set<ofUTF32Char> ofGetUnicodeBlock(ofUnicodeBlockName name)
	{
	// TODO: Cache these small sets inside the unicode block object?
	return OF_UNICODE_BLOCKS[name].getSet();
	}


	std::string ofGetUnicodeBlockString(ofUnicodeBlockName name)
	{
	std::string result;
	for (ofUTF32Char i = OF_UNICODE_BLOCKS[name].begin;
	i <= OF_UNICODE_BLOCKS[name].end;
	++i)
	{
	result += ofToUTF8(i);
	}

	return result;
	}

	std::set<ofUTF32Char> ofGetDefaultUnicodeBlock()
	{
	std::set<ofUTF32Char> s;
	for (ofUTF32Char i = 0; i < STANDARD_UNICODE_CHARS_LEN; ++i)
	{
	s.insert(STANDARD_UNICODE_CHARS[i]);
	}
	return s;
	}

	std::string ofGetDefaultUnicodeBlockString()
	{
	std::string s;
	for (ofUTF32Char i = 0; i < STANDARD_UNICODE_CHARS_LEN; ++i)
	{
	s += ofToUTF8(STANDARD_UNICODE_CHARS[i]);
	}
	return s;
	}