Created
June 4, 2017 08:47
-
-
Save anonymous/e3a89f1addfc8fd5dc27b67aed09ee29 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include "../3rdparty/uthash/src/uthash.h" | |
struct MVMUnicodeNamedAlias { | |
char *name; | |
int pvaluecode; | |
int strlen; | |
}; | |
typedef struct MVMUnicodeNamedAlias MVMUnicodeNamedAlias; | |
struct MVMUnicodeNamedAlias_hash { | |
const char *name; | |
int pvaluecode; | |
UT_hash_handle hh; | |
}; | |
typedef struct MVMUnicodeNamedAlias_hash MVMUnicodeNamedAlias_hash; | |
struct hash_pre { | |
MVMUnicodeNamedAlias_hash *hash; | |
MVMUnicodeNamedAlias *source; | |
int elems; | |
}; | |
typedef struct hash_pre hash_pre; | |
struct mapping_struct { | |
MVMUnicodeNamedAlias *alias; | |
int elems; | |
}; | |
typedef struct mapping_struct mapping_struct; | |
MVMUnicodeNamedAlias alias_names[437] = { | |
{"age",1,3},{"Age",1,3},{"alpha",2,5},{"alphabetic",2,10},{"Alpha",2,5}, | |
{"Alphabetic",2,10},{"ahex",3,4},{"asciihexdigit",3,13},{"AHex",3,4},{"ASCII_Hex_Digit", | |
3,15},{"bc",4,2},{"bidiclass",4,9},{"Bidi_Class",4,10},{"bidic",5,5},{"bidicontrol", | |
5,11},{"Bidi_C",5,6},{"Bidi_Control",5,12},{"bidim",6,5},{"bidimirrored",6,12}, | |
{"Bidi_M",6,6},{"Bidi_Mirrored",6,13},{"bidimirroringglyph",7,18},{"bmg",7,3}, | |
{"Bidi_Mirroring_Glyph",7,20},{"bidipairedbracket",8,17},{"bpb",8,3},{"Bidi_Paired_Bracket", | |
8,19},{"bidipairedbrackettype",9,21},{"bpt",9,3},{"Bidi_Paired_Bracket_Type",9, | |
24},{"blk",10,3},{"block",10,5},{"Block",10,5},{"canonicalcombiningclass",11, | |
23},{"ccc",11,3},{"Canonical_Combining_Class",11,25},{"casefolding",12,11}, | |
{"cf",12,2},{"Case_Folding",12,12},{"caseignorable",13,13},{"ci",13,2}, | |
{"Case_Ignorable",13,14},{"CI",13,2},{"cased",14,5},{"Cased",14,5},{"changeswhencasefolded", | |
15,21},{"cwcf",15,4},{"Changes_When_Casefolded",15,23},{"CWCF",15,4},{"changeswhencasemapped", | |
16,21},{"cwcm",16,4},{"Changes_When_Casemapped",16,23},{"CWCM",16,4},{"changeswhenlowercased", | |
17,21},{"cwl",17,3},{"Changes_When_Lowercased",17,23},{"CWL",17,3},{"changeswhennfkccasefolded", | |
18,25},{"cwkcf",18,5},{"Changes_When_NFKC_Casefolded",18,28},{"CWKCF",18,5}, | |
{"changeswhentitlecased",19,21},{"cwt",19,3},{"Changes_When_Titlecased",19,23}, | |
{"CWT",19,3},{"changeswhenuppercased",20,21},{"cwu",20,3},{"Changes_When_Uppercased", | |
20,23},{"CWU",20,3},{"ce",21,2},{"compositionexclusion",21,20},{"CE",21,2}, | |
{"Composition_Exclusion",21,21},{"dash",22,4},{"Dash",22,4},{"decompositionmapping", | |
23,20},{"dm",23,2},{"Decomposition_Mapping",23,21},{"decompositiontype",24,17}, | |
{"dt",24,2},{"Decomposition_Type",24,18},{"defaultignorablecodepoint",25,25}, | |
{"di",25,2},{"Default_Ignorable_Code_Point",25,28},{"DI",25,2},{"dep",26,3}, | |
{"deprecated",26,10},{"Dep",26,3},{"Deprecated",26,10},{"dia",27,3},{"diacritic", | |
27,9},{"Dia",27,3},{"Diacritic",27,9},{"ea",28,2},{"eastasianwidth",28,14}, | |
{"East_Asian_Width",28,16},{"expandsonnfc",29,12},{"Expands_On_NFC",29,14}, | |
{"xonfc",29,5},{"XO_NFC",29,6},{"expandsonnfd",30,12},{"Expands_On_NFD",30,14}, | |
{"xonfd",30,5},{"XO_NFD",30,6},{"expandsonnfkc",31,13},{"Expands_On_NFKC",31, | |
15},{"xonfkc",31,6},{"XO_NFKC",31,7},{"expandsonnfkd",32,13},{"Expands_On_NFKD", | |
32,15},{"xonfkd",32,6},{"XO_NFKD",32,7},{"ext",33,3},{"extender",33,8},{"Ext", | |
33,3},{"Extender",33,8},{"fcnfkc",34,6},{"fcnfkcclosure",34,13},{"FC_NFKC",34, | |
7},{"FC_NFKC_Closure",34,15},{"compex",35,6},{"Comp_Ex",35,7},{"fullcompositionexclusion", | |
35,24},{"Full_Composition_Exclusion",35,26},{"gc",36,2},{"generalcategory",36, | |
15},{"General_Category",36,16},{"graphemebase",37,12},{"grbase",37,6},{"Gr_Base", | |
37,7},{"Grapheme_Base",37,13},{"gcb",38,3},{"graphemeclusterbreak",38,20}, | |
{"GCB",38,3},{"Grapheme_Cluster_Break",38,22},{"graphemeextend",39,14},{"grext", | |
39,5},{"Gr_Ext",39,6},{"Grapheme_Extend",39,15},{"graphemelink",40,12}, | |
{"grlink",40,6},{"Gr_Link",40,7},{"Grapheme_Link",40,13},{"hangulsyllabletype", | |
41,18},{"hst",41,3},{"Hangul_Syllable_Type",41,20},{"hex",42,3},{"hexdigit",42, | |
8},{"Hex",42,3},{"Hex_Digit",42,9},{"hyphen",43,6},{"Hyphen",43,6},{"ideo",44, | |
4},{"ideographic",44,11},{"Ideo",44,4},{"Ideographic",44,11},{"idc",45,3}, | |
{"idcontinue",45,10},{"ID_Continue",45,11},{"IDC",45,3},{"ids",46,3},{"idstart", | |
46,7},{"ID_Start",46,8},{"IDS",46,3},{"idsb",47,4},{"idsbinaryoperator",47,17}, | |
{"IDS_Binary_Operator",47,19},{"IDSB",47,4},{"idst",48,4},{"idstrinaryoperator", | |
48,18},{"IDS_Trinary_Operator",48,20},{"IDST",48,4},{"indicpositionalcategory", | |
49,23},{"inpc",49,4},{"Indic_Positional_Category",49,25},{"InPC",49,4}, | |
{"indicsyllabiccategory",50,21},{"insc",50,4},{"Indic_Syllabic_Category",50,23}, | |
{"InSC",50,4},{"isc",51,3},{"isocomment",51,10},{"ISO_Comment",51,11},{"jamoshortname", | |
52,13},{"jsn",52,3},{"Jamo_Short_Name",52,15},{"JSN",52,3},{"joinc",53,5}, | |
{"joincontrol",53,11},{"Join_C",53,6},{"Join_Control",53,12},{"jg",54,2}, | |
{"joininggroup",54,12},{"Joining_Group",54,13},{"joiningtype",55,11},{"jt",55, | |
2},{"Joining_Type",55,12},{"cjkaccountingnumeric",56,20},{"cjkAccountingNumeric", | |
56,20},{"kaccountingnumeric",56,18},{"kAccountingNumeric",56,18},{"cjkcompatibilityvariant", | |
57,23},{"cjkCompatibilityVariant",57,23},{"kcompatibilityvariant",57,21}, | |
{"kCompatibilityVariant",57,21},{"cjkiicore",58,9},{"cjkIICore",58,9},{"kiicore", | |
58,7},{"kIICore",58,7},{"cjkirggsource",59,13},{"cjkIRG_GSource",59,14}, | |
{"kirggsource",59,11},{"kIRG_GSource",59,12},{"cjkirghsource",60,13},{"cjkIRG_HSource", | |
60,14},{"kirghsource",60,11},{"kIRG_HSource",60,12},{"cjkirgjsource",61,13}, | |
{"cjkIRG_JSource",61,14},{"kirgjsource",61,11},{"kIRG_JSource",61,12},{"cjkirgkpsource", | |
62,14},{"cjkIRG_KPSource",62,15},{"kirgkpsource",62,12},{"kIRG_KPSource",62,13}, | |
{"cjkirgksource",63,13},{"cjkIRG_KSource",63,14},{"kirgksource",63,11}, | |
{"kIRG_KSource",63,12},{"cjkirgmsource",64,13},{"cjkIRG_MSource",64,14}, | |
{"kirgmsource",64,11},{"kIRG_MSource",64,12},{"cjkirgtsource",65,13},{"cjkIRG_TSource", | |
65,14},{"kirgtsource",65,11},{"kIRG_TSource",65,12},{"cjkirgusource",66,13}, | |
{"cjkIRG_USource",66,14},{"kirgusource",66,11},{"kIRG_USource",66,12},{"cjkirgvsource", | |
67,13},{"cjkIRG_VSource",67,14},{"kirgvsource",67,11},{"kIRG_VSource",67,12}, | |
{"cjkothernumeric",68,15},{"cjkOtherNumeric",68,15},{"kothernumeric",68,13}, | |
{"kOtherNumeric",68,13},{"cjkprimarynumeric",69,17},{"cjkPrimaryNumeric",69,17}, | |
{"kprimarynumeric",69,15},{"kPrimaryNumeric",69,15},{"cjkrsunicode",70,12}, | |
{"cjkRSUnicode",70,12},{"krsunicode",70,10},{"kRSUnicode",70,10},{"lb",71,2}, | |
{"linebreak",71,9},{"Line_Break",71,10},{"loe",72,3},{"logicalorderexception", | |
72,21},{"Logical_Order_Exception",72,23},{"LOE",72,3},{"lower",73,5},{"lowercase", | |
73,9},{"Lower",73,5},{"Lowercase",73,9},{"lc",74,2},{"lowercasemapping",74,16}, | |
{"Lowercase_Mapping",74,17},{"math",75,4},{"Math",75,4},{"na",76,2},{"name",76, | |
4},{"Name",76,4},{"namealias",77,9},{"Name_Alias",77,10},{"nfcqc",78,5}, | |
{"nfcquickcheck",78,13},{"NFC_QC",78,6},{"NFC_Quick_Check",78,15},{"nfdqc",79, | |
5},{"nfdquickcheck",79,13},{"NFD_QC",79,6},{"NFD_Quick_Check",79,15},{"nfkccasefold", | |
80,12},{"nfkccf",80,6},{"NFKC_Casefold",80,13},{"NFKC_CF",80,7},{"nfkcqc",81,6}, | |
{"nfkcquickcheck",81,14},{"NFKC_QC",81,7},{"NFKC_Quick_Check",81,16},{"nfkdqc", | |
82,6},{"nfkdquickcheck",82,14},{"NFKD_QC",82,7},{"NFKD_Quick_Check",82,16}, | |
{"nchar",83,5},{"noncharactercodepoint",83,21},{"NChar",83,5},{"Noncharacter_Code_Point", | |
83,23},{"nt",84,2},{"numerictype",84,11},{"Numeric_Type",84,12},{"numericvalue", | |
85,12},{"nv",85,2},{"Numeric_Value",85,13},{"oalpha",86,6},{"otheralphabetic", | |
86,15},{"OAlpha",86,6},{"Other_Alphabetic",86,16},{"odi",87,3},{"otherdefaultignorablecodepoint", | |
87,30},{"ODI",87,3},{"Other_Default_Ignorable_Code_Point",87,34},{"ogrext",88, | |
6},{"othergraphemeextend",88,19},{"OGr_Ext",88,7},{"Other_Grapheme_Extend",88, | |
21},{"oidc",89,4},{"otheridcontinue",89,15},{"OIDC",89,4},{"Other_ID_Continue", | |
89,17},{"oids",90,4},{"otheridstart",90,12},{"OIDS",90,4},{"Other_ID_Start",90, | |
14},{"olower",91,6},{"otherlowercase",91,14},{"OLower",91,6},{"Other_Lowercase", | |
91,15},{"omath",92,5},{"othermath",92,9},{"OMath",92,5},{"Other_Math",92,10}, | |
{"otheruppercase",93,14},{"oupper",93,6},{"Other_Uppercase",93,15},{"OUpper",93, | |
6},{"patsyn",94,6},{"patternsyntax",94,13},{"Pat_Syn",94,7},{"Pattern_Syntax", | |
94,14},{"patternwhitespace",95,17},{"patws",95,5},{"Pat_WS",95,6},{"Pattern_White_Space", | |
95,19},{"pcm",96,3},{"prependedconcatenationmark",96,26},{"PCM",96,3},{"Prepended_Concatenation_Mark", | |
96,28},{"qmark",97,5},{"quotationmark",97,13},{"QMark",97,5},{"Quotation_Mark", | |
97,14},{"radical",98,7},{"Radical",98,7},{"scf",99,3},{"sfc",99,3},{"space",100, | |
5},{"wspace",100,6},{"WSpace",100,6},{"sc",101,2},{"script",101,6},{"Script", | |
101,6},{"scriptextensions",102,16},{"scx",102,3},{"Script_Extensions",102,17}, | |
{"sb",103,2},{"sentencebreak",103,13},{"SB",103,2},{"Sentence_Break",103,14}, | |
{"sentenceterminal",104,16},{"simplecasefolding",104,17},{"sterm",104,5}, | |
{"Sentence_Terminal",104,17},{"Simple_Case_Folding",104,19},{"STerm",104,5}, | |
{"simplelowercasemapping",105,22},{"slc",105,3},{"Simple_Lowercase_Mapping",105, | |
24},{"simpletitlecasemapping",106,22},{"stc",106,3},{"Simple_Titlecase_Mapping", | |
106,24},{"simpleuppercasemapping",107,22},{"suc",107,3},{"Simple_Uppercase_Mapping", | |
107,24},{"sd",108,2},{"softdotted",108,10},{"SD",108,2},{"Soft_Dotted",108,11}, | |
{"term",109,4},{"terminalpunctuation",109,19},{"Term",109,4},{"Terminal_Punctuation", | |
109,20},{"tc",110,2},{"titlecasemapping",110,16},{"Titlecase_Mapping",110,17}, | |
{"cjkrsunicode",111,12},{"cjkRSUnicode",111,12},{"na1",111,3},{"unicode1name", | |
111,12},{"unicoderadicalstroke",111,20},{"Unicode_1_Name",111,14},{"Unicode_Radical_Stroke", | |
111,22},{"uideo",112,5},{"unifiedideograph",112,16},{"UIdeo",112,5},{"Unified_Ideograph", | |
112,17},{"upper",113,5},{"uppercase",113,9},{"Upper",113,5},{"Uppercase",113,9}, | |
{"uc",114,2},{"uppercasemapping",114,16},{"urs",114,3},{"Uppercase_Mapping",114, | |
17},{"URS",114,3},{"variationselector",115,17},{"vs",115,2},{"Variation_Selector", | |
115,18},{"VS",115,2},{"whitespace",115,10},{"White_Space",115,11},{"wb",116,2}, | |
{"wordbreak",116,9},{"WB",116,2},{"Word_Break",116,10},{"xidc",117,4},{"xidcontinue", | |
117,11},{"XID_Continue",117,12},{"XIDC",117,4},{"xids",118,4},{"xidstart",118, | |
8},{"XID_Start",118,9},{"XIDS",118,4}}; | |
MVMUnicodeNamedAlias Age[58] = { | |
{"1.1",0,3},{"V1_1",0,4},{"v11",0,3},{"2.0",1,3},{"V2_0",1,4},{"v20",1,3}, | |
{"2.1",2,3},{"V2_1",2,4},{"v21",2,3},{"3.0",3,3},{"V3_0",3,4},{"v30",3,3}, | |
{"3.1",4,3},{"V3_1",4,4},{"v31",4,3},{"3.2",5,3},{"V3_2",5,4},{"v32",5,3}, | |
{"4.0",6,3},{"V4_0",6,4},{"v40",6,3},{"4.1",7,3},{"V4_1",7,4},{"v41",7,3}, | |
{"5.0",8,3},{"V5_0",8,4},{"v50",8,3},{"5.1",9,3},{"V5_1",9,4},{"v51",9,3}, | |
{"5.2",10,3},{"V5_2",10,4},{"v52",10,3},{"6.0",11,3},{"V6_0",11,4},{"v60",11,3}, | |
{"6.1",12,3},{"V6_1",12,4},{"v61",12,3},{"6.2",13,3},{"V6_2",13,4},{"v62",13,3}, | |
{"6.3",14,3},{"V6_3",14,4},{"v63",14,3},{"7.0",15,3},{"V7_0",15,4},{"v70",15,3}, | |
{"8.0",16,3},{"V8_0",16,4},{"v80",16,3},{"9.0",17,3},{"V9_0",17,4},{"v90",17,3}, | |
{"NA",18,2},{"Unassigned",18,10},{"na",18,2},{"unassigned",18,10}}; | |
/* 1 Age */ | |
MVMUnicodeNamedAlias Alphabetic[16] = { | |
{"F",0,1},{"False",0,5},{"N",0,1},{"No",0,2},{"f",0,1},{"false",0,5},{"n",0,1}, | |
{"no",0,2},{"T",1,1},{"True",1,4},{"Y",1,1},{"Yes",1,3},{"t",1,1},{"true",1,4}, | |
{"y",1,1},{"yes",1,3}}; | |
/* 2 Alphabetic */ | |
/* 3 ASCII_Hex_Digit */ | |
MVMUnicodeNamedAlias Bidi_Class[92] = { | |
{"AL",0,2},{"Arabic_Letter",0,13},{"al",0,2},{"arabicletter",0,12},{"AN",1,2}, | |
{"Arabic_Number",1,13},{"an",1,2},{"arabicnumber",1,12},{"B",2,1},{"Paragraph_Separator", | |
2,19},{"b",2,1},{"paragraphseparator",2,18},{"BN",3,2},{"Boundary_Neutral",3, | |
16},{"bn",3,2},{"boundaryneutral",3,15},{"CS",4,2},{"Common_Separator",4,16}, | |
{"commonseparator",4,15},{"cs",4,2},{"EN",5,2},{"European_Number",5,15},{"en",5, | |
2},{"europeannumber",5,14},{"ES",6,2},{"European_Separator",6,18},{"es",6,2}, | |
{"europeanseparator",6,17},{"ET",7,2},{"European_Terminator",7,19},{"et",7,2}, | |
{"europeanterminator",7,18},{"FSI",8,3},{"First_Strong_Isolate",8,20},{"firststrongisolate", | |
8,18},{"fsi",8,3},{"L",9,1},{"Left_To_Right",9,13},{"l",9,1},{"lefttoright",9, | |
11},{"LRE",10,3},{"Left_To_Right_Embedding",10,23},{"lefttorightembedding",10, | |
20},{"lre",10,3},{"LRI",11,3},{"Left_To_Right_Isolate",11,21},{"lefttorightisolate", | |
11,18},{"lri",11,3},{"LRO",12,3},{"Left_To_Right_Override",12,22},{"lefttorightoverride", | |
12,19},{"lro",12,3},{"NSM",13,3},{"Nonspacing_Mark",13,15},{"nonspacingmark",13, | |
14},{"nsm",13,3},{"ON",14,2},{"Other_Neutral",14,13},{"on",14,2},{"otherneutral", | |
14,12},{"PDF",15,3},{"Pop_Directional_Format",15,22},{"pdf",15,3},{"popdirectionalformat", | |
15,20},{"PDI",16,3},{"Pop_Directional_Isolate",16,23},{"pdi",16,3},{"popdirectionalisolate", | |
16,21},{"R",17,1},{"Right_To_Left",17,13},{"r",17,1},{"righttoleft",17,11}, | |
{"RLE",18,3},{"Right_To_Left_Embedding",18,23},{"righttoleftembedding",18,20}, | |
{"rle",18,3},{"RLI",19,3},{"Right_To_Left_Isolate",19,21},{"righttoleftisolate", | |
19,18},{"rli",19,3},{"RLO",20,3},{"Right_To_Left_Override",20,22},{"righttoleftoverride", | |
20,19},{"rlo",20,3},{"S",21,1},{"Segment_Separator",21,17},{"s",21,1},{"segmentseparator", | |
21,16},{"WS",22,2},{"White_Space",22,11},{"whitespace",22,10},{"ws",22, | |
2}}; | |
/* 4 Bidi_Class */ | |
/* 5 Bidi_Control */ | |
/* 6 Bidi_Mirrored */ | |
MVMUnicodeNamedAlias Bidi_Paired_Bracket_Type[9] = { | |
{"Close",0,5},{"c",0,1},{"close",0,5},{"None",1,4},{"n",1,1},{"none",1,4}, | |
{"Open",2,4},{"o",2,1},{"open",2,4}}; | |
/* 9 Bidi_Paired_Bracket_Type */ | |
MVMUnicodeNamedAlias Block[788] = { | |
{"Adlam",0,5},{"adlam",0,5},{"Aegean_Numbers",1,14},{"aegeannumbers",1,13}, | |
{"Ahom",2,4},{"ahom",2,4},{"Alchemical",3,10},{"Alchemical_Symbols",3,18}, | |
{"alchemical",3,10},{"alchemicalsymbols",3,17},{"Alphabetic_PF",4,13},{"Alphabetic_Presentation_Forms", | |
4,29},{"alphabeticpf",4,12},{"alphabeticpresentationforms",4,27},{"Anatolian_Hieroglyphs", | |
5,21},{"anatolianhieroglyphs",5,20},{"Ancient_Greek_Music",6,19},{"Ancient_Greek_Musical_Notation", | |
6,30},{"ancientgreekmusic",6,17},{"ancientgreekmusicalnotation",6,27},{"Ancient_Greek_Numbers", | |
7,21},{"ancientgreeknumbers",7,19},{"Ancient_Symbols",8,15},{"ancientsymbols",8, | |
14},{"Arabic",9,6},{"arabic",9,6},{"Arabic_Ext_A",10,12},{"Arabic_Extended_A", | |
10,17},{"arabicexta",10,10},{"arabicextendeda",10,15},{"Arabic_Math",11,11}, | |
{"Arabic_Mathematical_Alphabetic_Symbols",11,38},{"arabicmath",11,10},{"arabicmathematicalalphabeticsymbols", | |
11,35},{"Arabic_PF_A",12,11},{"Arabic_Presentation_Forms-A",12,27},{"Arabic_Presentation_Forms_A", | |
12,27},{"arabicpfa",12,9},{"arabicpresentationforms-a",12,25},{"arabicpresentationformsa", | |
12,24},{"Arabic_PF_B",13,11},{"Arabic_Presentation_Forms_B",13,27},{"arabicpfb", | |
13,9},{"arabicpresentationformsb",13,24},{"Arabic_Sup",14,10},{"Arabic_Supplement", | |
14,17},{"arabicsup",14,9},{"arabicsupplement",14,16},{"Armenian",15,8}, | |
{"armenian",15,8},{"Arrows",16,6},{"arrows",16,6},{"ASCII",17,5},{"Basic_Latin", | |
17,11},{"ascii",17,5},{"basiclatin",17,10},{"Avestan",18,7},{"avestan",18,7}, | |
{"Balinese",19,8},{"balinese",19,8},{"Bamum",20,5},{"bamum",20,5},{"Bamum_Sup", | |
21,9},{"Bamum_Supplement",21,16},{"bamumsup",21,8},{"bamumsupplement",21,15}, | |
{"Bassa_Vah",22,9},{"bassavah",22,8},{"Batak",23,5},{"batak",23,5},{"Bengali", | |
24,7},{"bengali",24,7},{"Bhaiksuki",25,9},{"bhaiksuki",25,9},{"Block_Elements", | |
26,14},{"blockelements",26,13},{"Bopomofo",27,8},{"bopomofo",27,8},{"Bopomofo_Ext", | |
28,12},{"Bopomofo_Extended",28,17},{"bopomofoext",28,11},{"bopomofoextended",28, | |
16},{"Box_Drawing",29,11},{"boxdrawing",29,10},{"Brahmi",30,6},{"brahmi",30,6}, | |
{"Braille",31,7},{"Braille_Patterns",31,16},{"braille",31,7},{"braillepatterns", | |
31,15},{"Buginese",32,8},{"buginese",32,8},{"Buhid",33,5},{"buhid",33,5}, | |
{"Byzantine_Music",34,15},{"Byzantine_Musical_Symbols",34,25},{"byzantinemusic", | |
34,14},{"byzantinemusicalsymbols",34,23},{"Carian",35,6},{"carian",35,6}, | |
{"Caucasian_Albanian",36,18},{"caucasianalbanian",36,17},{"Chakma",37,6}, | |
{"chakma",37,6},{"Cham",38,4},{"cham",38,4},{"Cherokee",39,8},{"cherokee",39,8}, | |
{"Cherokee_Sup",40,12},{"Cherokee_Supplement",40,19},{"cherokeesup",40,11}, | |
{"cherokeesupplement",40,18},{"CJK",41,3},{"CJK_Unified_Ideographs",41,22}, | |
{"cjk",41,3},{"cjkunifiedideographs",41,20},{"CJK_Compat",42,10},{"CJK_Compatibility", | |
42,17},{"cjkcompat",42,9},{"cjkcompatibility",42,16},{"CJK_Compat_Forms",43,16}, | |
{"CJK_Compatibility_Forms",43,23},{"cjkcompatforms",43,14},{"cjkcompatibilityforms", | |
43,21},{"CJK_Compat_Ideographs",44,21},{"CJK_Compatibility_Ideographs",44,28}, | |
{"cjkcompatibilityideographs",44,26},{"cjkcompatideographs",44,19},{"CJK_Compat_Ideographs_Sup", | |
45,25},{"CJK_Compatibility_Ideographs_Supplement",45,39},{"cjkcompatibilityideographssupplement", | |
45,36},{"cjkcompatideographssup",45,22},{"CJK_Ext_A",46,9},{"CJK_Unified_Ideographs_Extension_A", | |
46,34},{"cjkexta",46,7},{"cjkunifiedideographsextensiona",46,30},{"CJK_Ext_B", | |
47,9},{"CJK_Unified_Ideographs_Extension_B",47,34},{"cjkextb",47,7},{"cjkunifiedideographsextensionb", | |
47,30},{"CJK_Ext_C",48,9},{"CJK_Unified_Ideographs_Extension_C",48,34}, | |
{"cjkextc",48,7},{"cjkunifiedideographsextensionc",48,30},{"CJK_Ext_D",49,9}, | |
{"CJK_Unified_Ideographs_Extension_D",49,34},{"cjkextd",49,7},{"cjkunifiedideographsextensiond", | |
49,30},{"CJK_Ext_E",50,9},{"CJK_Unified_Ideographs_Extension_E",50,34}, | |
{"cjkexte",50,7},{"cjkunifiedideographsextensione",50,30},{"CJK_Radicals_Sup", | |
51,16},{"CJK_Radicals_Supplement",51,23},{"cjkradicalssup",51,14},{"cjkradicalssupplement", | |
51,21},{"CJK_Strokes",52,11},{"cjkstrokes",52,10},{"CJK_Symbols",53,11}, | |
{"CJK_Symbols_And_Punctuation",53,27},{"cjksymbols",53,10},{"cjksymbolsandpunctuation", | |
53,24},{"Compat_Jamo",54,11},{"Hangul_Compatibility_Jamo",54,25},{"compatjamo", | |
54,10},{"hangulcompatibilityjamo",54,23},{"Control_Pictures",55,16},{"controlpictures", | |
55,15},{"Coptic",56,6},{"coptic",56,6},{"Coptic_Epact_Numbers",57,20},{"copticepactnumbers", | |
57,18},{"Counting_Rod",58,12},{"Counting_Rod_Numerals",58,21},{"countingrod",58, | |
11},{"countingrodnumerals",58,19},{"Cuneiform",59,9},{"cuneiform",59,9}, | |
{"Cuneiform_Numbers",60,17},{"Cuneiform_Numbers_And_Punctuation",60,33}, | |
{"cuneiformnumbers",60,16},{"cuneiformnumbersandpunctuation",60,30},{"Currency_Symbols", | |
61,16},{"currencysymbols",61,15},{"Cypriot_Syllabary",62,17},{"cypriotsyllabary", | |
62,16},{"Cyrillic",63,8},{"cyrillic",63,8},{"Cyrillic_Ext_A",64,14},{"Cyrillic_Extended_A", | |
64,19},{"cyrillicexta",64,12},{"cyrillicextendeda",64,17},{"Cyrillic_Ext_B",65, | |
14},{"Cyrillic_Extended_B",65,19},{"cyrillicextb",65,12},{"cyrillicextendedb", | |
65,17},{"Cyrillic_Ext_C",66,14},{"Cyrillic_Extended_C",66,19},{"cyrillicextc", | |
66,12},{"cyrillicextendedc",66,17},{"Cyrillic_Sup",67,12},{"Cyrillic_Supplement", | |
67,19},{"Cyrillic_Supplementary",67,22},{"cyrillicsup",67,11},{"cyrillicsupplement", | |
67,18},{"cyrillicsupplementary",67,21},{"Deseret",68,7},{"deseret",68,7}, | |
{"Devanagari",69,10},{"devanagari",69,10},{"Devanagari_Ext",70,14},{"Devanagari_Extended", | |
70,19},{"devanagariext",70,13},{"devanagariextended",70,18},{"Combining_Diacritical_Marks", | |
71,27},{"Diacriticals",71,12},{"combiningdiacriticalmarks",71,25},{"diacriticals", | |
71,12},{"Combining_Diacritical_Marks_Extended",72,36},{"Diacriticals_Ext",72, | |
16},{"combiningdiacriticalmarksextended",72,33},{"diacriticalsext",72,15}, | |
{"Combining_Diacritical_Marks_For_Symbols",73,39},{"Combining_Marks_For_Symbols", | |
73,27},{"Diacriticals_For_Symbols",73,24},{"combiningdiacriticalmarksforsymbols", | |
73,35},{"combiningmarksforsymbols",73,24},{"diacriticalsforsymbols",73,22}, | |
{"Combining_Diacritical_Marks_Supplement",74,38},{"Diacriticals_Sup",74,16}, | |
{"combiningdiacriticalmarkssupplement",74,35},{"diacriticalssup",74,15}, | |
{"Dingbats",75,8},{"dingbats",75,8},{"Domino",76,6},{"Domino_Tiles",76,12}, | |
{"domino",76,6},{"dominotiles",76,11},{"Duployan",77,8},{"duployan",77,8}, | |
{"Early_Dynastic_Cuneiform",78,24},{"earlydynasticcuneiform",78,22},{"Egyptian_Hieroglyphs", | |
79,20},{"egyptianhieroglyphs",79,19},{"Elbasan",80,7},{"elbasan",80,7}, | |
{"Emoticons",81,9},{"emoticons",81,9},{"Enclosed_Alphanum",82,17},{"Enclosed_Alphanumerics", | |
82,22},{"enclosedalphanum",82,16},{"enclosedalphanumerics",82,21},{"Enclosed_Alphanum_Sup", | |
83,21},{"Enclosed_Alphanumeric_Supplement",83,32},{"enclosedalphanumericsupplement", | |
83,30},{"enclosedalphanumsup",83,19},{"Enclosed_CJK",84,12},{"Enclosed_CJK_Letters_And_Months", | |
84,31},{"enclosedcjk",84,11},{"enclosedcjklettersandmonths",84,27},{"Enclosed_Ideographic_Sup", | |
85,24},{"Enclosed_Ideographic_Supplement",85,31},{"enclosedideographicsup",85, | |
22},{"enclosedideographicsupplement",85,29},{"Ethiopic",86,8},{"ethiopic",86,8}, | |
{"Ethiopic_Ext",87,12},{"Ethiopic_Extended",87,17},{"ethiopicext",87,11}, | |
{"ethiopicextended",87,16},{"Ethiopic_Ext_A",88,14},{"Ethiopic_Extended_A",88, | |
19},{"ethiopicexta",88,12},{"ethiopicextendeda",88,17},{"Ethiopic_Sup",89,12}, | |
{"Ethiopic_Supplement",89,19},{"ethiopicsup",89,11},{"ethiopicsupplement",89, | |
18},{"Geometric_Shapes",90,16},{"geometricshapes",90,15},{"Geometric_Shapes_Ext", | |
91,20},{"Geometric_Shapes_Extended",91,25},{"geometricshapesext",91,18}, | |
{"geometricshapesextended",91,23},{"Georgian",92,8},{"georgian",92,8},{"Georgian_Sup", | |
93,12},{"Georgian_Supplement",93,19},{"georgiansup",93,11},{"georgiansupplement", | |
93,18},{"Glagolitic",94,10},{"glagolitic",94,10},{"Glagolitic_Sup",95,14}, | |
{"Glagolitic_Supplement",95,21},{"glagoliticsup",95,13},{"glagoliticsupplement", | |
95,20},{"Gothic",96,6},{"gothic",96,6},{"Grantha",97,7},{"grantha",97,7}, | |
{"Greek",98,5},{"Greek_And_Coptic",98,16},{"greek",98,5},{"greekandcoptic",98, | |
14},{"Greek_Ext",99,9},{"Greek_Extended",99,14},{"greekext",99,8},{"greekextended", | |
99,13},{"Gujarati",100,8},{"gujarati",100,8},{"Gurmukhi",101,8},{"gurmukhi",101, | |
8},{"Half_And_Full_Forms",102,19},{"Halfwidth_And_Fullwidth_Forms",102,29}, | |
{"halfandfullforms",102,16},{"halfwidthandfullwidthforms",102,26},{"Combining_Half_Marks", | |
103,20},{"Half_Marks",103,10},{"combininghalfmarks",103,18},{"halfmarks",103,9}, | |
{"Hangul",104,6},{"Hangul_Syllables",104,16},{"hangul",104,6},{"hangulsyllables", | |
104,15},{"Hanunoo",105,7},{"hanunoo",105,7},{"Hatran",106,6},{"hatran",106,6}, | |
{"Hebrew",107,6},{"hebrew",107,6},{"High_PU_Surrogates",108,18},{"High_Private_Use_Surrogates", | |
108,27},{"highprivateusesurrogates",108,24},{"highpusurrogates",108,16}, | |
{"High_Surrogates",109,15},{"highsurrogates",109,14},{"Hiragana",110,8}, | |
{"hiragana",110,8},{"IDC",111,3},{"Ideographic_Description_Characters",111,34}, | |
{"idc",111,3},{"ideographicdescriptioncharacters",111,32},{"Ideographic_Symbols", | |
112,19},{"Ideographic_Symbols_And_Punctuation",112,35},{"ideographicsymbols", | |
112,18},{"ideographicsymbolsandpunctuation",112,32},{"Imperial_Aramaic",113,16}, | |
{"imperialaramaic",113,15},{"Common_Indic_Number_Forms",114,25},{"Indic_Number_Forms", | |
114,18},{"commonindicnumberforms",114,22},{"indicnumberforms",114,16},{"Inscriptional_Pahlavi", | |
115,21},{"inscriptionalpahlavi",115,20},{"Inscriptional_Parthian",116,22}, | |
{"inscriptionalparthian",116,21},{"IPA_Ext",117,7},{"IPA_Extensions",117,14}, | |
{"ipaext",117,6},{"ipaextensions",117,13},{"Hangul_Jamo",118,11},{"Jamo",118,4}, | |
{"hanguljamo",118,10},{"jamo",118,4},{"Hangul_Jamo_Extended_A",119,22}, | |
{"Jamo_Ext_A",119,10},{"hanguljamoextendeda",119,19},{"jamoexta",119,8}, | |
{"Hangul_Jamo_Extended_B",120,22},{"Jamo_Ext_B",120,10},{"hanguljamoextendedb", | |
120,19},{"jamoextb",120,8},{"Javanese",121,8},{"javanese",121,8},{"Kaithi",122, | |
6},{"kaithi",122,6},{"Kana_Sup",123,8},{"Kana_Supplement",123,15},{"kanasup", | |
123,7},{"kanasupplement",123,14},{"Kanbun",124,6},{"kanbun",124,6},{"Kangxi", | |
125,6},{"Kangxi_Radicals",125,15},{"kangxi",125,6},{"kangxiradicals",125,14}, | |
{"Kannada",126,7},{"kannada",126,7},{"Katakana",127,8},{"katakana",127,8}, | |
{"Katakana_Ext",128,12},{"Katakana_Phonetic_Extensions",128,28},{"katakanaext", | |
128,11},{"katakanaphoneticextensions",128,26},{"Kayah_Li",129,8},{"kayahli",129, | |
7},{"Kharoshthi",130,10},{"kharoshthi",130,10},{"Khmer",131,5},{"khmer",131,5}, | |
{"Khmer_Symbols",132,13},{"khmersymbols",132,12},{"Khojki",133,6},{"khojki",133, | |
6},{"Khudawadi",134,9},{"khudawadi",134,9},{"Lao",135,3},{"lao",135,3}, | |
{"Latin_1",136,7},{"Latin_1_Sup",136,11},{"Latin_1_Supplement",136,18}, | |
{"latin1",136,6},{"latin1sup",136,9},{"latin1supplement",136,16},{"Latin_Ext_A", | |
137,11},{"Latin_Extended_A",137,16},{"latinexta",137,9},{"latinextendeda",137, | |
14},{"Latin_Ext_Additional",138,20},{"Latin_Extended_Additional",138,25}, | |
{"latinextadditional",138,18},{"latinextendedadditional",138,23},{"Latin_Ext_B", | |
139,11},{"Latin_Extended_B",139,16},{"latinextb",139,9},{"latinextendedb",139, | |
14},{"Latin_Ext_C",140,11},{"Latin_Extended_C",140,16},{"latinextc",140,9}, | |
{"latinextendedc",140,14},{"Latin_Ext_D",141,11},{"Latin_Extended_D",141,16}, | |
{"latinextd",141,9},{"latinextendedd",141,14},{"Latin_Ext_E",142,11},{"Latin_Extended_E", | |
142,16},{"latinexte",142,9},{"latinextendede",142,14},{"Lepcha",143,6}, | |
{"lepcha",143,6},{"Letterlike_Symbols",144,18},{"letterlikesymbols",144,17}, | |
{"Limbu",145,5},{"limbu",145,5},{"Linear_A",146,8},{"lineara",146,7},{"Linear_B_Ideograms", | |
147,18},{"linearbideograms",147,16},{"Linear_B_Syllabary",148,18},{"linearbsyllabary", | |
148,16},{"Lisu",149,4},{"lisu",149,4},{"Low_Surrogates",150,14},{"lowsurrogates", | |
150,13},{"Lycian",151,6},{"lycian",151,6},{"Lydian",152,6},{"lydian",152,6}, | |
{"Mahajani",153,8},{"mahajani",153,8},{"Mahjong",154,7},{"Mahjong_Tiles",154, | |
13},{"mahjong",154,7},{"mahjongtiles",154,12},{"Malayalam",155,9},{"malayalam", | |
155,9},{"Mandaic",156,7},{"mandaic",156,7},{"Manichaean",157,10},{"manichaean", | |
157,10},{"Marchen",158,7},{"marchen",158,7},{"Math_Alphanum",159,13},{"Mathematical_Alphanumeric_Symbols", | |
159,33},{"mathalphanum",159,12},{"mathematicalalphanumericsymbols",159,31}, | |
{"Math_Operators",160,14},{"Mathematical_Operators",160,22},{"mathematicaloperators", | |
160,21},{"mathoperators",160,13},{"Meetei_Mayek",161,12},{"meeteimayek",161,11}, | |
{"Meetei_Mayek_Ext",162,16},{"Meetei_Mayek_Extensions",162,23},{"meeteimayekext", | |
162,14},{"meeteimayekextensions",162,21},{"Mende_Kikakui",163,13},{"mendekikakui", | |
163,12},{"Meroitic_Cursive",164,16},{"meroiticcursive",164,15},{"Meroitic_Hieroglyphs", | |
165,20},{"meroitichieroglyphs",165,19},{"Miao",166,4},{"miao",166,4},{"Misc_Arrows", | |
167,11},{"Miscellaneous_Symbols_And_Arrows",167,32},{"miscarrows",167,10}, | |
{"miscellaneoussymbolsandarrows",167,29},{"Misc_Math_Symbols_A",168,19}, | |
{"Miscellaneous_Mathematical_Symbols_A",168,36},{"miscellaneousmathematicalsymbolsa", | |
168,33},{"miscmathsymbolsa",168,16},{"Misc_Math_Symbols_B",169,19},{"Miscellaneous_Mathematical_Symbols_B", | |
169,36},{"miscellaneousmathematicalsymbolsb",169,33},{"miscmathsymbolsb",169, | |
16},{"Misc_Pictographs",170,16},{"Miscellaneous_Symbols_And_Pictographs",170, | |
37},{"miscellaneoussymbolsandpictographs",170,34},{"miscpictographs",170,15}, | |
{"Misc_Symbols",171,12},{"Miscellaneous_Symbols",171,21},{"miscellaneoussymbols", | |
171,20},{"miscsymbols",171,11},{"Misc_Technical",172,14},{"Miscellaneous_Technical", | |
172,23},{"miscellaneoustechnical",172,22},{"misctechnical",172,13},{"Modi",173, | |
4},{"modi",173,4},{"Modifier_Letters",174,16},{"Spacing_Modifier_Letters",174, | |
24},{"modifierletters",174,15},{"spacingmodifierletters",174,22},{"Modifier_Tone_Letters", | |
175,21},{"modifiertoneletters",175,19},{"Mongolian",176,9},{"mongolian",176,9}, | |
{"Mongolian_Sup",177,13},{"Mongolian_Supplement",177,20},{"mongoliansup",177, | |
12},{"mongoliansupplement",177,19},{"Mro",178,3},{"mro",178,3},{"Multani",179, | |
7},{"multani",179,7},{"Music",180,5},{"Musical_Symbols",180,15},{"music",180,5}, | |
{"musicalsymbols",180,14},{"Myanmar",181,7},{"myanmar",181,7},{"Myanmar_Ext_A", | |
182,13},{"Myanmar_Extended_A",182,18},{"myanmarexta",182,11},{"myanmarextendeda", | |
182,16},{"Myanmar_Ext_B",183,13},{"Myanmar_Extended_B",183,18},{"myanmarextb", | |
183,11},{"myanmarextendedb",183,16},{"Nabataean",184,9},{"nabataean",184,9}, | |
{"NB",185,2},{"No_Block",185,8},{"nb",185,2},{"noblock",185,7},{"New_Tai_Lue", | |
186,11},{"newtailue",186,9},{"Newa",187,4},{"newa",187,4},{"NKo",188,3},{"nko", | |
188,3},{"Number_Forms",189,12},{"numberforms",189,11},{"OCR",190,3},{"Optical_Character_Recognition", | |
190,29},{"ocr",190,3},{"opticalcharacterrecognition",190,27},{"Ogham",191,5}, | |
{"ogham",191,5},{"Ol_Chiki",192,8},{"olchiki",192,7},{"Old_Hungarian",193,13}, | |
{"oldhungarian",193,12},{"Old_Italic",194,10},{"olditalic",194,9},{"Old_North_Arabian", | |
195,17},{"oldnortharabian",195,15},{"Old_Permic",196,10},{"oldpermic",196,9}, | |
{"Old_Persian",197,11},{"oldpersian",197,10},{"Old_South_Arabian",198,17}, | |
{"oldsoutharabian",198,15},{"Old_Turkic",199,10},{"oldturkic",199,9},{"Oriya", | |
200,5},{"oriya",200,5},{"Ornamental_Dingbats",201,19},{"ornamentaldingbats",201, | |
18},{"Osage",202,5},{"osage",202,5},{"Osmanya",203,7},{"osmanya",203,7}, | |
{"Pahawh_Hmong",204,12},{"pahawhhmong",204,11},{"Palmyrene",205,9},{"palmyrene", | |
205,9},{"Pau_Cin_Hau",206,11},{"paucinhau",206,9},{"Phags_Pa",207,8},{"phagspa", | |
207,7},{"Phaistos",208,8},{"Phaistos_Disc",208,13},{"phaistos",208,8},{"phaistosdisc", | |
208,12},{"Phoenician",209,10},{"phoenician",209,10},{"Phonetic_Ext",210,12}, | |
{"Phonetic_Extensions",210,19},{"phoneticext",210,11},{"phoneticextensions",210, | |
18},{"Phonetic_Ext_Sup",211,16},{"Phonetic_Extensions_Supplement",211,30}, | |
{"phoneticextensionssupplement",211,28},{"phoneticextsup",211,14},{"Playing_Cards", | |
212,13},{"playingcards",212,12},{"Psalter_Pahlavi",213,15},{"psalterpahlavi", | |
213,14},{"PUA",214,3},{"Private_Use",214,11},{"Private_Use_Area",214,16}, | |
{"privateuse",214,10},{"privateusearea",214,14},{"pua",214,3},{"General_Punctuation", | |
215,19},{"Punctuation",215,11},{"generalpunctuation",215,18},{"punctuation",215, | |
11},{"Rejang",216,6},{"rejang",216,6},{"Rumi",217,4},{"Rumi_Numeral_Symbols", | |
217,20},{"rumi",217,4},{"ruminumeralsymbols",217,18},{"Runic",218,5},{"runic", | |
218,5},{"Samaritan",219,9},{"samaritan",219,9},{"Saurashtra",220,10},{"saurashtra", | |
220,10},{"Sharada",221,7},{"sharada",221,7},{"Shavian",222,7},{"shavian",222,7}, | |
{"Shorthand_Format_Controls",223,25},{"shorthandformatcontrols",223,23}, | |
{"Siddham",224,7},{"siddham",224,7},{"Sinhala",225,7},{"sinhala",225,7}, | |
{"Sinhala_Archaic_Numbers",226,23},{"sinhalaarchaicnumbers",226,21},{"Small_Form_Variants", | |
227,19},{"Small_Forms",227,11},{"smallforms",227,10},{"smallformvariants",227, | |
17},{"Sora_Sompeng",228,12},{"sorasompeng",228,11},{"Specials",229,8},{"specials", | |
229,8},{"Sundanese",230,9},{"sundanese",230,9},{"Sundanese_Sup",231,13}, | |
{"Sundanese_Supplement",231,20},{"sundanesesup",231,12},{"sundanesesupplement", | |
231,19},{"Sup_Arrows_A",232,12},{"Supplemental_Arrows_A",232,21},{"suparrowsa", | |
232,10},{"supplementalarrowsa",232,19},{"Sup_Arrows_B",233,12},{"Supplemental_Arrows_B", | |
233,21},{"suparrowsb",233,10},{"supplementalarrowsb",233,19},{"Sup_Arrows_C", | |
234,12},{"Supplemental_Arrows_C",234,21},{"suparrowsc",234,10},{"supplementalarrowsc", | |
234,19},{"Sup_Math_Operators",235,18},{"Supplemental_Mathematical_Operators", | |
235,35},{"supmathoperators",235,16},{"supplementalmathematicaloperators",235, | |
33},{"Sup_PUA_A",236,9},{"Supplementary_Private_Use_Area_A",236,32},{"supplementaryprivateuseareaa", | |
236,28},{"suppuaa",236,7},{"Sup_PUA_B",237,9},{"Supplementary_Private_Use_Area_B", | |
237,32},{"supplementaryprivateuseareab",237,28},{"suppuab",237,7},{"Sup_Punctuation", | |
238,15},{"Supplemental_Punctuation",238,24},{"supplementalpunctuation",238,23}, | |
{"suppunctuation",238,14},{"Sup_Symbols_And_Pictographs",239,27},{"Supplemental_Symbols_And_Pictographs", | |
239,36},{"supplementalsymbolsandpictographs",239,33},{"supsymbolsandpictographs", | |
239,24},{"Super_And_Sub",240,13},{"Superscripts_And_Subscripts",240,27}, | |
{"superandsub",240,11},{"superscriptsandsubscripts",240,25},{"Sutton_SignWriting", | |
241,18},{"suttonsignwriting",241,17},{"Syloti_Nagri",242,12},{"sylotinagri",242, | |
11},{"Syriac",243,6},{"syriac",243,6},{"Tagalog",244,7},{"tagalog",244,7}, | |
{"Tagbanwa",245,8},{"tagbanwa",245,8},{"Tags",246,4},{"tags",246,4},{"Tai_Le", | |
247,6},{"taile",247,5},{"Tai_Tham",248,8},{"taitham",248,7},{"Tai_Viet",249,8}, | |
{"taiviet",249,7},{"Tai_Xuan_Jing",250,13},{"Tai_Xuan_Jing_Symbols",250,21}, | |
{"taixuanjing",250,11},{"taixuanjingsymbols",250,18},{"Takri",251,5},{"takri", | |
251,5},{"Tamil",252,5},{"tamil",252,5},{"Tangut",253,6},{"tangut",253,6}, | |
{"Tangut_Components",254,17},{"tangutcomponents",254,16},{"Telugu",255,6}, | |
{"telugu",255,6},{"Thaana",256,6},{"thaana",256,6},{"Thai",257,4},{"thai",257, | |
4},{"Tibetan",258,7},{"tibetan",258,7},{"Tifinagh",259,8},{"tifinagh",259,8}, | |
{"Tirhuta",260,7},{"tirhuta",260,7},{"Transport_And_Map",261,17},{"Transport_And_Map_Symbols", | |
261,25},{"transportandmap",261,15},{"transportandmapsymbols",261,22},{"Canadian_Syllabics", | |
262,18},{"UCAS",262,4},{"Unified_Canadian_Aboriginal_Syllabics",262,37}, | |
{"canadiansyllabics",262,17},{"ucas",262,4},{"unifiedcanadianaboriginalsyllabics", | |
262,34},{"UCAS_Ext",263,8},{"Unified_Canadian_Aboriginal_Syllabics_Extended", | |
263,46},{"ucasext",263,7},{"unifiedcanadianaboriginalsyllabicsextended",263,42}, | |
{"Ugaritic",264,8},{"ugaritic",264,8},{"Vai",265,3},{"vai",265,3},{"Vedic_Ext", | |
266,9},{"Vedic_Extensions",266,16},{"vedicext",266,8},{"vedicextensions",266, | |
15},{"Vertical_Forms",267,14},{"verticalforms",267,13},{"VS",268,2},{"Variation_Selectors", | |
268,19},{"variationselectors",268,18},{"vs",268,2},{"VS_Sup",269,6},{"Variation_Selectors_Supplement", | |
269,30},{"variationselectorssupplement",269,28},{"vssup",269,5},{"Warang_Citi", | |
270,11},{"warangciti",270,10},{"Yi_Radicals",271,11},{"yiradicals",271,10}, | |
{"Yi_Syllables",272,12},{"yisyllables",272,11},{"Yijing",273,6},{"Yijing_Hexagram_Symbols", | |
273,23},{"yijing",273,6},{"yijinghexagramsymbols",273,21}}; | |
/* 10 Block */ | |
MVMUnicodeNamedAlias Canonical_Combining_Class[211] = { | |
{"0",0,1},{"NR",0,2},{"Not_Reordered",0,13},{"notreordered",0,12},{"nr",0,2}, | |
{"1",1,1},{"OV",1,2},{"Overlay",1,7},{"ov",1,2},{"overlay",1,7},{"7",2,1},{"NK", | |
2,2},{"Nukta",2,5},{"nk",2,2},{"nukta",2,5},{"8",3,1},{"KV",3,2},{"Kana_Voicing", | |
3,12},{"kanavoicing",3,11},{"kv",3,2},{"9",4,1},{"VR",4,2},{"Virama",4,6}, | |
{"virama",4,6},{"vr",4,2},{"10",5,2},{"CCC10",5,5},{"ccc10",5,5},{"11",6,2}, | |
{"CCC11",6,5},{"ccc11",6,5},{"12",7,2},{"CCC12",7,5},{"ccc12",7,5},{"13",8,2}, | |
{"CCC13",8,5},{"ccc13",8,5},{"14",9,2},{"CCC14",9,5},{"ccc14",9,5},{"15",10,2}, | |
{"CCC15",10,5},{"ccc15",10,5},{"16",11,2},{"CCC16",11,5},{"ccc16",11,5},{"17", | |
12,2},{"CCC17",12,5},{"ccc17",12,5},{"18",13,2},{"CCC18",13,5},{"ccc18",13,5}, | |
{"19",14,2},{"CCC19",14,5},{"ccc19",14,5},{"20",15,2},{"CCC20",15,5},{"ccc20", | |
15,5},{"21",16,2},{"CCC21",16,5},{"ccc21",16,5},{"22",17,2},{"CCC22",17,5}, | |
{"ccc22",17,5},{"23",18,2},{"CCC23",18,5},{"ccc23",18,5},{"24",19,2},{"CCC24", | |
19,5},{"ccc24",19,5},{"25",20,2},{"CCC25",20,5},{"ccc25",20,5},{"26",21,2}, | |
{"CCC26",21,5},{"ccc26",21,5},{"27",22,2},{"CCC27",22,5},{"ccc27",22,5},{"28", | |
23,2},{"CCC28",23,5},{"ccc28",23,5},{"29",24,2},{"CCC29",24,5},{"ccc29",24,5}, | |
{"30",25,2},{"CCC30",25,5},{"ccc30",25,5},{"31",26,2},{"CCC31",26,5},{"ccc31", | |
26,5},{"32",27,2},{"CCC32",27,5},{"ccc32",27,5},{"33",28,2},{"CCC33",28,5}, | |
{"ccc33",28,5},{"34",29,2},{"CCC34",29,5},{"ccc34",29,5},{"35",30,2},{"CCC35", | |
30,5},{"ccc35",30,5},{"36",31,2},{"CCC36",31,5},{"ccc36",31,5},{"84",32,2}, | |
{"CCC84",32,5},{"ccc84",32,5},{"91",33,2},{"CCC91",33,5},{"ccc91",33,5},{"103", | |
34,3},{"CCC103",34,6},{"ccc103",34,6},{"107",35,3},{"CCC107",35,6},{"ccc107",35, | |
6},{"118",36,3},{"CCC118",36,6},{"ccc118",36,6},{"122",37,3},{"CCC122",37,6}, | |
{"ccc122",37,6},{"129",38,3},{"CCC129",38,6},{"ccc129",38,6},{"130",39,3}, | |
{"CCC130",39,6},{"ccc130",39,6},{"132",40,3},{"CCC132",40,6},{"ccc132",40,6}, | |
{"133",41,3},{"CCC133",41,6},{"ccc133",41,6},{"200",42,3},{"ATBL",42,4}, | |
{"Attached_Below_Left",42,19},{"atbl",42,4},{"attachedbelowleft",42,17},{"202", | |
43,3},{"ATB",43,3},{"Attached_Below",43,14},{"atb",43,3},{"attachedbelow",43, | |
13},{"214",44,3},{"ATA",44,3},{"Attached_Above",44,14},{"ata",44,3},{"attachedabove", | |
44,13},{"216",45,3},{"ATAR",45,4},{"Attached_Above_Right",45,20},{"atar",45,4}, | |
{"attachedaboveright",45,18},{"218",46,3},{"BL",46,2},{"Below_Left",46,10}, | |
{"belowleft",46,9},{"bl",46,2},{"220",47,3},{"B",47,1},{"Below",47,5},{"b",47, | |
1},{"below",47,5},{"222",48,3},{"BR",48,2},{"Below_Right",48,11},{"belowright", | |
48,10},{"br",48,2},{"224",49,3},{"L",49,1},{"Left",49,4},{"l",49,1},{"left",49, | |
4},{"226",50,3},{"R",50,1},{"Right",50,5},{"r",50,1},{"right",50,5},{"228",51, | |
3},{"AL",51,2},{"Above_Left",51,10},{"aboveleft",51,9},{"al",51,2},{"230",52,3}, | |
{"A",52,1},{"Above",52,5},{"a",52,1},{"above",52,5},{"232",53,3},{"AR",53,2}, | |
{"Above_Right",53,11},{"aboveright",53,10},{"ar",53,2},{"233",54,3},{"DB",54,2}, | |
{"Double_Below",54,12},{"db",54,2},{"doublebelow",54,11},{"234",55,3},{"DA",55, | |
2},{"Double_Above",55,12},{"da",55,2},{"doubleabove",55,11},{"240",56,3},{"IS", | |
56,2},{"Iota_Subscript",56,14},{"iotasubscript",56,13},{"is",56,2}}; | |
/* 11 Canonical_Combining_Class */ | |
/* 13 Case_Ignorable */ | |
/* 14 Cased */ | |
/* 15 Changes_When_Casefolded */ | |
/* 16 Changes_When_Casemapped */ | |
/* 17 Changes_When_Lowercased */ | |
/* 18 Changes_When_NFKC_Casefolded */ | |
/* 19 Changes_When_Titlecased */ | |
/* 20 Changes_When_Uppercased */ | |
/* 21 Composition_Exclusion */ | |
/* 22 Dash */ | |
MVMUnicodeNamedAlias Decomposition_Type[64] = { | |
{"Can",0,3},{"Canonical",0,9},{"can",0,3},{"canonical",0,9},{"Com",1,3}, | |
{"Compat",1,6},{"com",1,3},{"compat",1,6},{"Circle",2,6},{"Enc",2,3},{"circle", | |
2,6},{"enc",2,3},{"Fin",3,3},{"Final",3,5},{"fin",3,3},{"final",3,5},{"Font",4, | |
4},{"font",4,4},{"Fra",5,3},{"Fraction",5,8},{"fra",5,3},{"fraction",5,8}, | |
{"Init",6,4},{"Initial",6,7},{"init",6,4},{"initial",6,7},{"Iso",7,3},{"Isolated", | |
7,8},{"iso",7,3},{"isolated",7,8},{"Med",8,3},{"Medial",8,6},{"med",8,3}, | |
{"medial",8,6},{"Nar",9,3},{"Narrow",9,6},{"nar",9,3},{"narrow",9,6},{"Nb",10, | |
2},{"Nobreak",10,7},{"nb",10,2},{"nobreak",10,7},{"None",11,4},{"none",11,4}, | |
{"Small",12,5},{"Sml",12,3},{"small",12,5},{"sml",12,3},{"Sqr",13,3},{"Square", | |
13,6},{"sqr",13,3},{"square",13,6},{"Sub",14,3},{"sub",14,3},{"Sup",15,3}, | |
{"Super",15,5},{"sup",15,3},{"super",15,5},{"Vert",16,4},{"Vertical",16,8}, | |
{"vert",16,4},{"vertical",16,8},{"Wide",17,4},{"wide",17,4}}; | |
/* 24 Decomposition_Type */ | |
/* 25 Default_Ignorable_Code_Point */ | |
/* 26 Deprecated */ | |
/* 27 Diacritic */ | |
MVMUnicodeNamedAlias East_Asian_Width[24] = { | |
{"A",0,1},{"Ambiguous",0,9},{"a",0,1},{"ambiguous",0,9},{"F",1,1},{"Fullwidth", | |
1,9},{"f",1,1},{"fullwidth",1,9},{"H",2,1},{"Halfwidth",2,9},{"h",2,1}, | |
{"halfwidth",2,9},{"N",3,1},{"Neutral",3,7},{"n",3,1},{"neutral",3,7},{"Na",4, | |
2},{"Narrow",4,6},{"na",4,2},{"narrow",4,6},{"W",5,1},{"Wide",5,4},{"w",5,1}, | |
{"wide",5,4}}; | |
/* 28 East_Asian_Width */ | |
/* 29 Expands_On_NFC */ | |
/* 30 Expands_On_NFD */ | |
/* 31 Expands_On_NFKC */ | |
/* 32 Expands_On_NFKD */ | |
/* 33 Extender */ | |
/* 35 Full_Composition_Exclusion */ | |
MVMUnicodeNamedAlias General_Category[157] = { | |
{"C",0,1},{"Other",0,5},{"c",0,1},{"other",0,5},{"Cc",1,2},{"Control",1,7}, | |
{"cc",1,2},{"cntrl",1,5},{"control",1,7},{"Cf",2,2},{"Format",2,6},{"cf",2,2}, | |
{"format",2,6},{"Cn",3,2},{"Unassigned",3,10},{"cn",3,2},{"unassigned",3,10}, | |
{"Co",4,2},{"Private_Use",4,11},{"co",4,2},{"privateuse",4,10},{"Cs",5,2}, | |
{"Surrogate",5,9},{"cs",5,2},{"surrogate",5,9},{"L",6,1},{"Letter",6,6},{"l",6, | |
1},{"letter",6,6},{"Cased_Letter",7,12},{"LC",7,2},{"casedletter",7,11},{"lc",7, | |
2},{"Ll",8,2},{"Lowercase_Letter",8,16},{"ll",8,2},{"lowercaseletter",8,15}, | |
{"Lm",9,2},{"Modifier_Letter",9,15},{"lm",9,2},{"modifierletter",9,14},{"Lo",10, | |
2},{"Other_Letter",10,12},{"lo",10,2},{"otherletter",10,11},{"Lt",11,2}, | |
{"Titlecase_Letter",11,16},{"lt",11,2},{"titlecaseletter",11,15},{"Lu",12,2}, | |
{"Uppercase_Letter",12,16},{"lu",12,2},{"uppercaseletter",12,15},{"Combining_Mark", | |
13,14},{"M",13,1},{"Mark",13,4},{"combiningmark",13,13},{"m",13,1},{"mark",13, | |
4},{"Mc",14,2},{"Spacing_Mark",14,12},{"mc",14,2},{"spacingmark",14,11}, | |
{"Enclosing_Mark",15,14},{"Me",15,2},{"enclosingmark",15,13},{"me",15,2},{"Mn", | |
16,2},{"Nonspacing_Mark",16,15},{"mn",16,2},{"nonspacingmark",16,14},{"N",17,1}, | |
{"Number",17,6},{"n",17,1},{"number",17,6},{"Decimal_Number",18,14},{"Nd",18,2}, | |
{"decimalnumber",18,13},{"digit",18,5},{"nd",18,2},{"Letter_Number",19,13}, | |
{"Nl",19,2},{"letternumber",19,12},{"nl",19,2},{"No",20,2},{"Other_Number",20, | |
12},{"no",20,2},{"othernumber",20,11},{"P",21,1},{"Punctuation",21,11},{"p",21, | |
1},{"punct",21,5},{"punctuation",21,11},{"Connector_Punctuation",22,21},{"Pc", | |
22,2},{"connectorpunctuation",22,20},{"pc",22,2},{"Dash_Punctuation",23,16}, | |
{"Pd",23,2},{"dashpunctuation",23,15},{"pd",23,2},{"Close_Punctuation",24,17}, | |
{"Pe",24,2},{"closepunctuation",24,16},{"pe",24,2},{"Final_Punctuation",25,17}, | |
{"Pf",25,2},{"finalpunctuation",25,16},{"pf",25,2},{"Initial_Punctuation",26, | |
19},{"Pi",26,2},{"initialpunctuation",26,18},{"pi",26,2},{"Other_Punctuation", | |
27,17},{"Po",27,2},{"otherpunctuation",27,16},{"po",27,2},{"Open_Punctuation", | |
28,16},{"Ps",28,2},{"openpunctuation",28,15},{"ps",28,2},{"S",29,1},{"Symbol", | |
29,6},{"s",29,1},{"symbol",29,6},{"Currency_Symbol",30,15},{"Sc",30,2}, | |
{"currencysymbol",30,14},{"sc",30,2},{"Modifier_Symbol",31,15},{"Sk",31,2}, | |
{"modifiersymbol",31,14},{"sk",31,2},{"Math_Symbol",32,11},{"Sm",32,2}, | |
{"mathsymbol",32,10},{"sm",32,2},{"Other_Symbol",33,12},{"So",33,2},{"othersymbol", | |
33,11},{"so",33,2},{"Separator",34,9},{"Z",34,1},{"separator",34,9},{"z",34,1}, | |
{"Line_Separator",35,14},{"Zl",35,2},{"lineseparator",35,13},{"zl",35,2}, | |
{"Paragraph_Separator",36,19},{"Zp",36,2},{"paragraphseparator",36,18},{"zp",36, | |
2},{"Space_Separator",37,15},{"Zs",37,2},{"spaceseparator",37,14},{"zs",37, | |
2}}; | |
/* 36 General_Category */ | |
/* 37 Grapheme_Base */ | |
MVMUnicodeNamedAlias Grapheme_Cluster_Break[56] = { | |
{"CN",0,2},{"Control",0,7},{"cn",0,2},{"control",0,7},{"CR",1,2},{"cr",1,2}, | |
{"EB",2,2},{"E_Base",2,6},{"eb",2,2},{"ebase",2,5},{"EBG",3,3},{"E_Base_GAZ",3, | |
10},{"ebasegaz",3,8},{"ebg",3,3},{"EM",4,2},{"E_Modifier",4,10},{"em",4,2}, | |
{"emodifier",4,9},{"EX",5,2},{"Extend",5,6},{"ex",5,2},{"extend",5,6},{"GAZ",6, | |
3},{"Glue_After_Zwj",6,14},{"gaz",6,3},{"glueafterzwj",6,12},{"L",7,1},{"l",7, | |
1},{"LF",8,2},{"lf",8,2},{"LV",9,2},{"lv",9,2},{"LVT",10,3},{"lvt",10,3},{"PP", | |
11,2},{"Prepend",11,7},{"pp",11,2},{"prepend",11,7},{"RI",12,2},{"Regional_Indicator", | |
12,18},{"regionalindicator",12,17},{"ri",12,2},{"SM",13,2},{"SpacingMark",13, | |
11},{"sm",13,2},{"spacingmark",13,11},{"T",14,1},{"t",14,1},{"V",15,1},{"v",15, | |
1},{"Other",16,5},{"XX",16,2},{"other",16,5},{"xx",16,2},{"ZWJ",17,3},{"zwj",17, | |
3}}; | |
/* 38 Grapheme_Cluster_Break */ | |
/* 39 Grapheme_Extend */ | |
/* 40 Grapheme_Link */ | |
MVMUnicodeNamedAlias Hangul_Syllable_Type[24] = { | |
{"L",0,1},{"Leading_Jamo",0,12},{"l",0,1},{"leadingjamo",0,11},{"LV",1,2}, | |
{"LV_Syllable",1,11},{"lv",1,2},{"lvsyllable",1,10},{"LVT",2,3},{"LVT_Syllable", | |
2,12},{"lvt",2,3},{"lvtsyllable",2,11},{"NA",3,2},{"Not_Applicable",3,14},{"na", | |
3,2},{"notapplicable",3,13},{"T",4,1},{"Trailing_Jamo",4,13},{"t",4,1}, | |
{"trailingjamo",4,12},{"V",5,1},{"Vowel_Jamo",5,10},{"v",5,1},{"voweljamo",5, | |
9}}; | |
/* 41 Hangul_Syllable_Type */ | |
/* 42 Hex_Digit */ | |
/* 43 Hyphen */ | |
/* 44 Ideographic */ | |
/* 45 ID_Continue */ | |
/* 46 ID_Start */ | |
/* 47 IDS_Binary_Operator */ | |
/* 48 IDS_Trinary_Operator */ | |
MVMUnicodeNamedAlias Indic_Positional_Category[28] = { | |
{"Bottom",0,6},{"bottom",0,6},{"Bottom_And_Right",1,16},{"bottomandright",1,14}, | |
{"Left",2,4},{"left",2,4},{"Left_And_Right",3,14},{"leftandright",3,12},{"NA",4, | |
2},{"na",4,2},{"Overstruck",5,10},{"overstruck",5,10},{"Right",6,5},{"right",6, | |
5},{"Top",7,3},{"top",7,3},{"Top_And_Bottom",8,14},{"topandbottom",8,12}, | |
{"Top_And_Bottom_And_Right",9,24},{"topandbottomandright",9,20},{"Top_And_Left", | |
10,12},{"topandleft",10,10},{"Top_And_Left_And_Right",11,22},{"topandleftandright", | |
11,18},{"Top_And_Right",12,13},{"topandright",12,11},{"Visual_Order_Left",13, | |
17},{"visualorderleft",13,15}}; | |
/* 49 Indic_Positional_Category */ | |
MVMUnicodeNamedAlias Indic_Syllabic_Category[70] = { | |
{"Avagraha",0,8},{"avagraha",0,8},{"Bindu",1,5},{"bindu",1,5},{"Brahmi_Joining_Number", | |
2,21},{"brahmijoiningnumber",2,19},{"Cantillation_Mark",3,17},{"cantillationmark", | |
3,16},{"Consonant",4,9},{"consonant",4,9},{"Consonant_Dead",5,14},{"consonantdead", | |
5,13},{"Consonant_Final",6,15},{"consonantfinal",6,14},{"Consonant_Head_Letter", | |
7,21},{"consonantheadletter",7,19},{"Consonant_Killer",8,16},{"consonantkiller", | |
8,15},{"Consonant_Medial",9,16},{"consonantmedial",9,15},{"Consonant_Placeholder", | |
10,21},{"consonantplaceholder",10,20},{"Consonant_Preceding_Repha",11,25}, | |
{"consonantprecedingrepha",11,23},{"Consonant_Prefixed",12,18},{"consonantprefixed", | |
12,17},{"Consonant_Subjoined",13,19},{"consonantsubjoined",13,18},{"Consonant_Succeeding_Repha", | |
14,26},{"consonantsucceedingrepha",14,24},{"Consonant_With_Stacker",15,22}, | |
{"consonantwithstacker",15,20},{"Gemination_Mark",16,15},{"geminationmark",16, | |
14},{"Invisible_Stacker",17,17},{"invisiblestacker",17,16},{"Joiner",18,6}, | |
{"joiner",18,6},{"Modifying_Letter",19,16},{"modifyingletter",19,15},{"Non_Joiner", | |
20,10},{"nonjoiner",20,9},{"Nukta",21,5},{"nukta",21,5},{"Number",22,6}, | |
{"number",22,6},{"Number_Joiner",23,13},{"numberjoiner",23,12},{"Other",24,5}, | |
{"other",24,5},{"Pure_Killer",25,11},{"purekiller",25,10},{"Register_Shifter", | |
26,16},{"registershifter",26,15},{"Syllable_Modifier",27,17},{"syllablemodifier", | |
27,16},{"Tone_Letter",28,11},{"toneletter",28,10},{"Tone_Mark",29,9},{"tonemark", | |
29,8},{"Virama",30,6},{"virama",30,6},{"Visarga",31,7},{"visarga",31,7}, | |
{"Vowel",32,5},{"vowel",32,5},{"Vowel_Dependent",33,15},{"voweldependent",33, | |
14},{"Vowel_Independent",34,17},{"vowelindependent",34,16}}; | |
/* 50 Indic_Syllabic_Category */ | |
MVMUnicodeNamedAlias Jamo_Short_Name[104] = { | |
{"A",0,1},{"a",0,1},{"AE",1,2},{"ae",1,2},{"B",2,1},{"b",2,1},{"BB",3,2},{"bb", | |
3,2},{"BS",4,2},{"bs",4,2},{"C",5,1},{"c",5,1},{"D",6,1},{"d",6,1},{"DD",7,2}, | |
{"dd",7,2},{"E",8,1},{"e",8,1},{"EO",9,2},{"eo",9,2},{"EU",10,2},{"eu",10,2}, | |
{"G",11,1},{"g",11,1},{"GG",12,2},{"gg",12,2},{"GS",13,2},{"gs",13,2},{"H",14, | |
1},{"h",14,1},{"I",15,1},{"i",15,1},{"J",16,1},{"j",16,1},{"JJ",17,2},{"jj",17, | |
2},{"K",18,1},{"k",18,1},{"L",19,1},{"l",19,1},{"LB",20,2},{"lb",20,2},{"LG",21, | |
2},{"lg",21,2},{"LH",22,2},{"lh",22,2},{"LM",23,2},{"lm",23,2},{"LP",24,2}, | |
{"lp",24,2},{"LS",25,2},{"ls",25,2},{"LT",26,2},{"lt",26,2},{"M",27,1},{"m",27, | |
1},{"N",28,1},{"n",28,1},{"NG",29,2},{"ng",29,2},{"NH",30,2},{"nh",30,2},{"NJ", | |
31,2},{"nj",31,2},{"O",32,1},{"o",32,1},{"OE",33,2},{"oe",33,2},{"P",34,1},{"p", | |
34,1},{"R",35,1},{"r",35,1},{"S",36,1},{"s",36,1},{"SS",37,2},{"ss",37,2},{"T", | |
38,1},{"t",38,1},{"U",39,1},{"u",39,1},{"WA",40,2},{"wa",40,2},{"WAE",41,3}, | |
{"wae",41,3},{"WE",42,2},{"we",42,2},{"WEO",43,3},{"weo",43,3},{"WI",44,2}, | |
{"wi",44,2},{"YA",45,2},{"ya",45,2},{"YAE",46,3},{"yae",46,3},{"YE",47,2},{"ye", | |
47,2},{"YEO",48,3},{"yeo",48,3},{"YI",49,2},{"yi",49,2},{"YO",50,2},{"yo",50,2}, | |
{"YU",51,2},{"yu",51,2}}; | |
/* 52 Jamo_Short_Name */ | |
/* 53 Join_Control */ | |
MVMUnicodeNamedAlias Joining_Group[180] = { | |
{"African_Feh",0,11},{"africanfeh",0,10},{"African_Noon",1,12},{"africannoon",1, | |
11},{"African_Qaf",2,11},{"africanqaf",2,10},{"Ain",3,3},{"ain",3,3},{"Alaph",4, | |
5},{"alaph",4,5},{"Alef",5,4},{"alef",5,4},{"Beh",6,3},{"beh",6,3},{"Beth",7,4}, | |
{"beth",7,4},{"Burushaski_Yeh_Barree",8,21},{"burushaskiyehbarree",8,19},{"Dal", | |
9,3},{"dal",9,3},{"Dalath_Rish",10,11},{"dalathrish",10,10},{"E",11,1},{"e",11, | |
1},{"Farsi_Yeh",12,9},{"farsiyeh",12,8},{"Fe",13,2},{"fe",13,2},{"Feh",14,3}, | |
{"feh",14,3},{"Final_Semkath",15,13},{"finalsemkath",15,12},{"Gaf",16,3},{"gaf", | |
16,3},{"Gamal",17,5},{"gamal",17,5},{"Hah",18,3},{"hah",18,3},{"He",19,2},{"he", | |
19,2},{"Heh",20,3},{"heh",20,3},{"Heh_Goal",21,8},{"hehgoal",21,7},{"Heth",22, | |
4},{"heth",22,4},{"Kaf",23,3},{"kaf",23,3},{"Kaph",24,4},{"kaph",24,4},{"Khaph", | |
25,5},{"khaph",25,5},{"Knotted_Heh",26,11},{"knottedheh",26,10},{"Lam",27,3}, | |
{"lam",27,3},{"Lamadh",28,6},{"lamadh",28,6},{"Manichaean_Aleph",29,16}, | |
{"manichaeanaleph",29,15},{"Manichaean_Ayin",30,15},{"manichaeanayin",30,14}, | |
{"Manichaean_Beth",31,15},{"manichaeanbeth",31,14},{"Manichaean_Daleth",32,17}, | |
{"manichaeandaleth",32,16},{"Manichaean_Dhamedh",33,18},{"manichaeandhamedh",33, | |
17},{"Manichaean_Five",34,15},{"manichaeanfive",34,14},{"Manichaean_Gimel",35, | |
16},{"manichaeangimel",35,15},{"Manichaean_Heth",36,15},{"manichaeanheth",36, | |
14},{"Manichaean_Hundred",37,18},{"manichaeanhundred",37,17},{"Manichaean_Kaph", | |
38,15},{"manichaeankaph",38,14},{"Manichaean_Lamedh",39,17},{"manichaeanlamedh", | |
39,16},{"Manichaean_Mem",40,14},{"manichaeanmem",40,13},{"Manichaean_Nun",41, | |
14},{"manichaeannun",41,13},{"Manichaean_One",42,14},{"manichaeanone",42,13}, | |
{"Manichaean_Pe",43,13},{"manichaeanpe",43,12},{"Manichaean_Qoph",44,15}, | |
{"manichaeanqoph",44,14},{"Manichaean_Resh",45,15},{"manichaeanresh",45,14}, | |
{"Manichaean_Sadhe",46,16},{"manichaeansadhe",46,15},{"Manichaean_Samekh",47, | |
17},{"manichaeansamekh",47,16},{"Manichaean_Taw",48,14},{"manichaeantaw",48,13}, | |
{"Manichaean_Ten",49,14},{"manichaeanten",49,13},{"Manichaean_Teth",50,15}, | |
{"manichaeanteth",50,14},{"Manichaean_Thamedh",51,18},{"manichaeanthamedh",51, | |
17},{"Manichaean_Twenty",52,17},{"manichaeantwenty",52,16},{"Manichaean_Waw",53, | |
14},{"manichaeanwaw",53,13},{"Manichaean_Yodh",54,15},{"manichaeanyodh",54,14}, | |
{"Manichaean_Zayin",55,16},{"manichaeanzayin",55,15},{"Meem",56,4},{"meem",56, | |
4},{"Mim",57,3},{"mim",57,3},{"No_Joining_Group",58,16},{"nojoininggroup",58, | |
14},{"Noon",59,4},{"noon",59,4},{"Nun",60,3},{"nun",60,3},{"Nya",61,3},{"nya", | |
61,3},{"Pe",62,2},{"pe",62,2},{"Qaf",63,3},{"qaf",63,3},{"Qaph",64,4},{"qaph", | |
64,4},{"Reh",65,3},{"reh",65,3},{"Reversed_Pe",66,11},{"reversedpe",66,10}, | |
{"Rohingya_Yeh",67,12},{"rohingyayeh",67,11},{"Sad",68,3},{"sad",68,3},{"Sadhe", | |
69,5},{"sadhe",69,5},{"Seen",70,4},{"seen",70,4},{"Semkath",71,7},{"semkath",71, | |
7},{"Shin",72,4},{"shin",72,4},{"Straight_Waw",73,12},{"straightwaw",73,11}, | |
{"Swash_Kaf",74,9},{"swashkaf",74,8},{"Syriac_Waw",75,10},{"syriacwaw",75,9}, | |
{"Tah",76,3},{"tah",76,3},{"Taw",77,3},{"taw",77,3},{"Teh_Marbuta",78,11}, | |
{"tehmarbuta",78,10},{"Hamza_On_Heh_Goal",79,17},{"Teh_Marbuta_Goal",79,16}, | |
{"hamzaonhehgoal",79,14},{"tehmarbutagoal",79,14},{"Teth",80,4},{"teth",80,4}, | |
{"Waw",81,3},{"waw",81,3},{"Yeh",82,3},{"yeh",82,3},{"Yeh_Barree",83,10}, | |
{"yehbarree",83,9},{"Yeh_With_Tail",84,13},{"yehwithtail",84,11},{"Yudh",85,4}, | |
{"yudh",85,4},{"Yudh_He",86,7},{"yudhhe",86,6},{"Zain",87,4},{"zain",87,4}, | |
{"Zhain",88,5},{"zhain",88,5}}; | |
/* 54 Joining_Group */ | |
MVMUnicodeNamedAlias Joining_Type[24] = { | |
{"C",0,1},{"Join_Causing",0,12},{"c",0,1},{"joincausing",0,11},{"D",1,1}, | |
{"Dual_Joining",1,12},{"d",1,1},{"dualjoining",1,11},{"L",2,1},{"Left_Joining", | |
2,12},{"l",2,1},{"leftjoining",2,11},{"R",3,1},{"Right_Joining",3,13},{"r",3,1}, | |
{"rightjoining",3,12},{"T",4,1},{"Transparent",4,11},{"t",4,1},{"transparent",4, | |
11},{"Non_Joining",5,11},{"U",5,1},{"nonjoining",5,10},{"u",5,1}}; | |
/* 55 Joining_Type */ | |
MVMUnicodeNamedAlias Line_Break[162] = { | |
{"AI",0,2},{"Ambiguous",0,9},{"ai",0,2},{"ambiguous",0,9},{"AL",1,2},{"Alphabetic", | |
1,10},{"al",1,2},{"alphabetic",1,10},{"B2",2,2},{"Break_Both",2,10},{"b2",2,2}, | |
{"breakboth",2,9},{"BA",3,2},{"Break_After",3,11},{"ba",3,2},{"breakafter",3, | |
10},{"BB",4,2},{"Break_Before",4,12},{"bb",4,2},{"breakbefore",4,11},{"BK",5,2}, | |
{"Mandatory_Break",5,15},{"bk",5,2},{"mandatorybreak",5,14},{"CB",6,2}, | |
{"Contingent_Break",6,16},{"cb",6,2},{"contingentbreak",6,15},{"CJ",7,2}, | |
{"Conditional_Japanese_Starter",7,28},{"cj",7,2},{"conditionaljapanesestarter", | |
7,26},{"CL",8,2},{"Close_Punctuation",8,17},{"cl",8,2},{"closepunctuation",8, | |
16},{"CM",9,2},{"Combining_Mark",9,14},{"cm",9,2},{"combiningmark",9,13},{"CP", | |
10,2},{"Close_Parenthesis",10,17},{"closeparenthesis",10,16},{"cp",10,2},{"CR", | |
11,2},{"Carriage_Return",11,15},{"carriagereturn",11,14},{"cr",11,2},{"EB",12, | |
2},{"E_Base",12,6},{"eb",12,2},{"ebase",12,5},{"EM",13,2},{"E_Modifier",13,10}, | |
{"em",13,2},{"emodifier",13,9},{"EX",14,2},{"Exclamation",14,11},{"ex",14,2}, | |
{"exclamation",14,11},{"GL",15,2},{"Glue",15,4},{"gl",15,2},{"glue",15,4},{"H2", | |
16,2},{"h2",16,2},{"H3",17,2},{"h3",17,2},{"HL",18,2},{"Hebrew_Letter",18,13}, | |
{"hebrewletter",18,12},{"hl",18,2},{"HY",19,2},{"Hyphen",19,6},{"hy",19,2}, | |
{"hyphen",19,6},{"ID",20,2},{"Ideographic",20,11},{"id",20,2},{"ideographic",20, | |
11},{"IN",21,2},{"Inseparable",21,11},{"Inseperable",21,11},{"in",21,2}, | |
{"inseparable",21,11},{"inseperable",21,11},{"IS",22,2},{"Infix_Numeric",22,13}, | |
{"infixnumeric",22,12},{"is",22,2},{"JL",23,2},{"jl",23,2},{"JT",24,2},{"jt",24, | |
2},{"JV",25,2},{"jv",25,2},{"LF",26,2},{"Line_Feed",26,9},{"lf",26,2},{"linefeed", | |
26,8},{"NL",27,2},{"Next_Line",27,9},{"nextline",27,8},{"nl",27,2},{"NS",28,2}, | |
{"Nonstarter",28,10},{"nonstarter",28,10},{"ns",28,2},{"NU",29,2},{"Numeric",29, | |
7},{"nu",29,2},{"numeric",29,7},{"OP",30,2},{"Open_Punctuation",30,16},{"op",30, | |
2},{"openpunctuation",30,15},{"PO",31,2},{"Postfix_Numeric",31,15},{"po",31,2}, | |
{"postfixnumeric",31,14},{"PR",32,2},{"Prefix_Numeric",32,14},{"pr",32,2}, | |
{"prefixnumeric",32,13},{"QU",33,2},{"Quotation",33,9},{"qu",33,2},{"quotation", | |
33,9},{"RI",34,2},{"Regional_Indicator",34,18},{"regionalindicator",34,17}, | |
{"ri",34,2},{"Complex_Context",35,15},{"SA",35,2},{"complexcontext",35,14}, | |
{"sa",35,2},{"SG",36,2},{"Surrogate",36,9},{"sg",36,2},{"surrogate",36,9},{"SP", | |
37,2},{"Space",37,5},{"sp",37,2},{"space",37,5},{"Break_Symbols",38,13},{"SY", | |
38,2},{"breaksymbols",38,12},{"sy",38,2},{"WJ",39,2},{"Word_Joiner",39,11}, | |
{"wj",39,2},{"wordjoiner",39,10},{"Unknown",40,7},{"XX",40,2},{"unknown",40,7}, | |
{"xx",40,2},{"ZW",41,2},{"ZWSpace",41,7},{"zw",41,2},{"zwspace",41,7},{"ZWJ",42, | |
3},{"zwj",42,3}}; | |
/* 71 Line_Break */ | |
/* 72 Logical_Order_Exception */ | |
/* 73 Lowercase */ | |
/* 75 Math */ | |
MVMUnicodeNamedAlias NFC_Quick_Check[12] = { | |
{"M",0,1},{"Maybe",0,5},{"m",0,1},{"maybe",0,5},{"N",1,1},{"No",1,2},{"n",1,1}, | |
{"no",1,2},{"Y",2,1},{"Yes",2,3},{"y",2,1},{"yes",2,3}}; | |
/* 78 NFC_Quick_Check */ | |
MVMUnicodeNamedAlias NFD_Quick_Check[8] = { | |
{"N",0,1},{"No",0,2},{"n",0,1},{"no",0,2},{"Y",1,1},{"Yes",1,3},{"y",1,1}, | |
{"yes",1,3}}; | |
/* 79 NFD_Quick_Check */ | |
/* 81 NFKC_Quick_Check */ | |
/* 82 NFKD_Quick_Check */ | |
/* 83 Noncharacter_Code_Point */ | |
MVMUnicodeNamedAlias Numeric_Type[14] = { | |
{"De",0,2},{"Decimal",0,7},{"de",0,2},{"decimal",0,7},{"Di",1,2},{"Digit",1,5}, | |
{"di",1,2},{"digit",1,5},{"None",2,4},{"none",2,4},{"Nu",3,2},{"Numeric",3,7}, | |
{"nu",3,2},{"numeric",3,7}}; | |
/* 84 Numeric_Type */ | |
/* 86 Other_Alphabetic */ | |
/* 87 Other_Default_Ignorable_Code_Point */ | |
/* 88 Other_Grapheme_Extend */ | |
/* 89 Other_ID_Continue */ | |
/* 90 Other_ID_Start */ | |
/* 91 Other_Lowercase */ | |
/* 92 Other_Math */ | |
/* 93 Other_Uppercase */ | |
/* 94 Pattern_Syntax */ | |
/* 95 Pattern_White_Space */ | |
/* 96 Prepended_Concatenation_Mark */ | |
/* 97 Quotation_Mark */ | |
/* 98 Radical */ | |
MVMUnicodeNamedAlias Script[548] = { | |
{"Adlam",0,5},{"Adlm",0,4},{"adlam",0,5},{"adlm",0,4},{"Aghb",1,4},{"Caucasian_Albanian", | |
1,18},{"aghb",1,4},{"caucasianalbanian",1,17},{"Ahom",2,4},{"ahom",2,4},{"Arab", | |
3,4},{"Arabic",3,6},{"arab",3,4},{"arabic",3,6},{"Armi",4,4},{"Imperial_Aramaic", | |
4,16},{"armi",4,4},{"imperialaramaic",4,15},{"Armenian",5,8},{"Armn",5,4}, | |
{"armenian",5,8},{"armn",5,4},{"Avestan",6,7},{"Avst",6,4},{"avestan",6,7}, | |
{"avst",6,4},{"Bali",7,4},{"Balinese",7,8},{"bali",7,4},{"balinese",7,8}, | |
{"Bamu",8,4},{"Bamum",8,5},{"bamu",8,4},{"bamum",8,5},{"Bass",9,4},{"Bassa_Vah", | |
9,9},{"bass",9,4},{"bassavah",9,8},{"Batak",10,5},{"Batk",10,4},{"batak",10,5}, | |
{"batk",10,4},{"Beng",11,4},{"Bengali",11,7},{"beng",11,4},{"bengali",11,7}, | |
{"Bhaiksuki",12,9},{"Bhks",12,4},{"bhaiksuki",12,9},{"bhks",12,4},{"Bopo",13,4}, | |
{"Bopomofo",13,8},{"bopo",13,4},{"bopomofo",13,8},{"Brah",14,4},{"Brahmi",14,6}, | |
{"brah",14,4},{"brahmi",14,6},{"Brai",15,4},{"Braille",15,7},{"brai",15,4}, | |
{"braille",15,7},{"Bugi",16,4},{"Buginese",16,8},{"bugi",16,4},{"buginese",16, | |
8},{"Buhd",17,4},{"Buhid",17,5},{"buhd",17,4},{"buhid",17,5},{"Cakm",18,4}, | |
{"Chakma",18,6},{"cakm",18,4},{"chakma",18,6},{"Canadian_Aboriginal",19,19}, | |
{"Cans",19,4},{"canadianaboriginal",19,18},{"cans",19,4},{"Cari",20,4}, | |
{"Carian",20,6},{"cari",20,4},{"carian",20,6},{"Cham",21,4},{"cham",21,4}, | |
{"Cher",22,4},{"Cherokee",22,8},{"cher",22,4},{"cherokee",22,8},{"Copt",23,4}, | |
{"Coptic",23,6},{"Qaac",23,4},{"copt",23,4},{"coptic",23,6},{"qaac",23,4}, | |
{"Cprt",24,4},{"Cypriot",24,7},{"cprt",24,4},{"cypriot",24,7},{"Cyrillic",25,8}, | |
{"Cyrl",25,4},{"cyrillic",25,8},{"cyrl",25,4},{"Deva",26,4},{"Devanagari",26, | |
10},{"deva",26,4},{"devanagari",26,10},{"Deseret",27,7},{"Dsrt",27,4},{"deseret", | |
27,7},{"dsrt",27,4},{"Dupl",28,4},{"Duployan",28,8},{"dupl",28,4},{"duployan", | |
28,8},{"Egyp",29,4},{"Egyptian_Hieroglyphs",29,20},{"egyp",29,4},{"egyptianhieroglyphs", | |
29,19},{"Elba",30,4},{"Elbasan",30,7},{"elba",30,4},{"elbasan",30,7},{"Ethi",31, | |
4},{"Ethiopic",31,8},{"ethi",31,4},{"ethiopic",31,8},{"Geor",32,4},{"Georgian", | |
32,8},{"geor",32,4},{"georgian",32,8},{"Glag",33,4},{"Glagolitic",33,10}, | |
{"glag",33,4},{"glagolitic",33,10},{"Goth",34,4},{"Gothic",34,6},{"goth",34,4}, | |
{"gothic",34,6},{"Gran",35,4},{"Grantha",35,7},{"gran",35,4},{"grantha",35,7}, | |
{"Greek",36,5},{"Grek",36,4},{"greek",36,5},{"grek",36,4},{"Gujarati",37,8}, | |
{"Gujr",37,4},{"gujarati",37,8},{"gujr",37,4},{"Gurmukhi",38,8},{"Guru",38,4}, | |
{"gurmukhi",38,8},{"guru",38,4},{"Hang",39,4},{"Hangul",39,6},{"hang",39,4}, | |
{"hangul",39,6},{"Han",40,3},{"Hani",40,4},{"han",40,3},{"hani",40,4},{"Hano", | |
41,4},{"Hanunoo",41,7},{"hano",41,4},{"hanunoo",41,7},{"Hatr",42,4},{"Hatran", | |
42,6},{"hatr",42,4},{"hatran",42,6},{"Hebr",43,4},{"Hebrew",43,6},{"hebr",43,4}, | |
{"hebrew",43,6},{"Hira",44,4},{"Hiragana",44,8},{"hira",44,4},{"hiragana",44,8}, | |
{"Anatolian_Hieroglyphs",45,21},{"Hluw",45,4},{"anatolianhieroglyphs",45,20}, | |
{"hluw",45,4},{"Hmng",46,4},{"Pahawh_Hmong",46,12},{"hmng",46,4},{"pahawhhmong", | |
46,11},{"Hrkt",47,4},{"Katakana_Or_Hiragana",47,20},{"hrkt",47,4},{"katakanaorhiragana", | |
47,18},{"Hung",48,4},{"Old_Hungarian",48,13},{"hung",48,4},{"oldhungarian",48, | |
12},{"Ital",49,4},{"Old_Italic",49,10},{"ital",49,4},{"olditalic",49,9},{"Java", | |
50,4},{"Javanese",50,8},{"java",50,4},{"javanese",50,8},{"Kali",51,4},{"Kayah_Li", | |
51,8},{"kali",51,4},{"kayahli",51,7},{"Kana",52,4},{"Katakana",52,8},{"kana",52, | |
4},{"katakana",52,8},{"Khar",53,4},{"Kharoshthi",53,10},{"khar",53,4},{"kharoshthi", | |
53,10},{"Khmer",54,5},{"Khmr",54,4},{"khmer",54,5},{"khmr",54,4},{"Khoj",55,4}, | |
{"Khojki",55,6},{"khoj",55,4},{"khojki",55,6},{"Kannada",56,7},{"Knda",56,4}, | |
{"kannada",56,7},{"knda",56,4},{"Kaithi",57,6},{"Kthi",57,4},{"kaithi",57,6}, | |
{"kthi",57,4},{"Lana",58,4},{"Tai_Tham",58,8},{"lana",58,4},{"taitham",58,7}, | |
{"Lao",59,3},{"Laoo",59,4},{"lao",59,3},{"laoo",59,4},{"Latin",60,5},{"Latn",60, | |
4},{"latin",60,5},{"latn",60,4},{"Lepc",61,4},{"Lepcha",61,6},{"lepc",61,4}, | |
{"lepcha",61,6},{"Limb",62,4},{"Limbu",62,5},{"limb",62,4},{"limbu",62,5}, | |
{"Lina",63,4},{"Linear_A",63,8},{"lina",63,4},{"lineara",63,7},{"Linb",64,4}, | |
{"Linear_B",64,8},{"linb",64,4},{"linearb",64,7},{"Lisu",65,4},{"lisu",65,4}, | |
{"Lyci",66,4},{"Lycian",66,6},{"lyci",66,4},{"lycian",66,6},{"Lydi",67,4}, | |
{"Lydian",67,6},{"lydi",67,4},{"lydian",67,6},{"Mahajani",68,8},{"Mahj",68,4}, | |
{"mahajani",68,8},{"mahj",68,4},{"Mand",69,4},{"Mandaic",69,7},{"mand",69,4}, | |
{"mandaic",69,7},{"Mani",70,4},{"Manichaean",70,10},{"mani",70,4},{"manichaean", | |
70,10},{"Marc",71,4},{"Marchen",71,7},{"marc",71,4},{"marchen",71,7},{"Mend",72, | |
4},{"Mende_Kikakui",72,13},{"mend",72,4},{"mendekikakui",72,12},{"Merc",73,4}, | |
{"Meroitic_Cursive",73,16},{"merc",73,4},{"meroiticcursive",73,15},{"Mero",74, | |
4},{"Meroitic_Hieroglyphs",74,20},{"mero",74,4},{"meroitichieroglyphs",74,19}, | |
{"Malayalam",75,9},{"Mlym",75,4},{"malayalam",75,9},{"mlym",75,4},{"Modi",76,4}, | |
{"modi",76,4},{"Mong",77,4},{"Mongolian",77,9},{"mong",77,4},{"mongolian",77,9}, | |
{"Mro",78,3},{"Mroo",78,4},{"mro",78,3},{"mroo",78,4},{"Meetei_Mayek",79,12}, | |
{"Mtei",79,4},{"meeteimayek",79,11},{"mtei",79,4},{"Mult",80,4},{"Multani",80, | |
7},{"mult",80,4},{"multani",80,7},{"Myanmar",81,7},{"Mymr",81,4},{"myanmar",81, | |
7},{"mymr",81,4},{"Narb",82,4},{"Old_North_Arabian",82,17},{"narb",82,4}, | |
{"oldnortharabian",82,15},{"Nabataean",83,9},{"Nbat",83,4},{"nabataean",83,9}, | |
{"nbat",83,4},{"Newa",84,4},{"newa",84,4},{"Nko",85,3},{"Nkoo",85,4},{"nko",85, | |
3},{"nkoo",85,4},{"Ogam",86,4},{"Ogham",86,5},{"ogam",86,4},{"ogham",86,5}, | |
{"Ol_Chiki",87,8},{"Olck",87,4},{"olchiki",87,7},{"olck",87,4},{"Old_Turkic",88, | |
10},{"Orkh",88,4},{"oldturkic",88,9},{"orkh",88,4},{"Oriya",89,5},{"Orya",89,4}, | |
{"oriya",89,5},{"orya",89,4},{"Osage",90,5},{"Osge",90,4},{"osage",90,5}, | |
{"osge",90,4},{"Osma",91,4},{"Osmanya",91,7},{"osma",91,4},{"osmanya",91,7}, | |
{"Palm",92,4},{"Palmyrene",92,9},{"palm",92,4},{"palmyrene",92,9},{"Pau_Cin_Hau", | |
93,11},{"Pauc",93,4},{"pauc",93,4},{"paucinhau",93,9},{"Old_Permic",94,10}, | |
{"Perm",94,4},{"oldpermic",94,9},{"perm",94,4},{"Phag",95,4},{"Phags_Pa",95,8}, | |
{"phag",95,4},{"phagspa",95,7},{"Inscriptional_Pahlavi",96,21},{"Phli",96,4}, | |
{"inscriptionalpahlavi",96,20},{"phli",96,4},{"Phlp",97,4},{"Psalter_Pahlavi", | |
97,15},{"phlp",97,4},{"psalterpahlavi",97,14},{"Phnx",98,4},{"Phoenician",98, | |
10},{"phnx",98,4},{"phoenician",98,10},{"Miao",99,4},{"Plrd",99,4},{"miao",99, | |
4},{"plrd",99,4},{"Inscriptional_Parthian",100,22},{"Prti",100,4},{"inscriptionalparthian", | |
100,21},{"prti",100,4},{"Rejang",101,6},{"Rjng",101,4},{"rejang",101,6},{"rjng", | |
101,4},{"Runic",102,5},{"Runr",102,4},{"runic",102,5},{"runr",102,4},{"Samaritan", | |
103,9},{"Samr",103,4},{"samaritan",103,9},{"samr",103,4},{"Old_South_Arabian", | |
104,17},{"Sarb",104,4},{"oldsoutharabian",104,15},{"sarb",104,4},{"Saur",105,4}, | |
{"Saurashtra",105,10},{"saur",105,4},{"saurashtra",105,10},{"Sgnw",106,4}, | |
{"SignWriting",106,11},{"sgnw",106,4},{"signwriting",106,11},{"Shavian",107,7}, | |
{"Shaw",107,4},{"shavian",107,7},{"shaw",107,4},{"Sharada",108,7},{"Shrd",108, | |
4},{"sharada",108,7},{"shrd",108,4},{"Sidd",109,4},{"Siddham",109,7},{"sidd", | |
109,4},{"siddham",109,7},{"Khudawadi",110,9},{"Sind",110,4},{"khudawadi",110,9}, | |
{"sind",110,4},{"Sinh",111,4},{"Sinhala",111,7},{"sinh",111,4},{"sinhala",111, | |
7},{"Sora",112,4},{"Sora_Sompeng",112,12},{"sora",112,4},{"sorasompeng",112,11}, | |
{"Sund",113,4},{"Sundanese",113,9},{"sund",113,4},{"sundanese",113,9},{"Sylo", | |
114,4},{"Syloti_Nagri",114,12},{"sylo",114,4},{"sylotinagri",114,11},{"Syrc", | |
115,4},{"Syriac",115,6},{"syrc",115,4},{"syriac",115,6},{"Tagb",116,4}, | |
{"Tagbanwa",116,8},{"tagb",116,4},{"tagbanwa",116,8},{"Takr",117,4},{"Takri", | |
117,5},{"takr",117,4},{"takri",117,5},{"Tai_Le",118,6},{"Tale",118,4},{"taile", | |
118,5},{"tale",118,4},{"New_Tai_Lue",119,11},{"Talu",119,4},{"newtailue",119,9}, | |
{"talu",119,4},{"Tamil",120,5},{"Taml",120,4},{"tamil",120,5},{"taml",120,4}, | |
{"Tang",121,4},{"Tangut",121,6},{"tang",121,4},{"tangut",121,6},{"Tai_Viet",122, | |
8},{"Tavt",122,4},{"taiviet",122,7},{"tavt",122,4},{"Telu",123,4},{"Telugu",123, | |
6},{"telu",123,4},{"telugu",123,6},{"Tfng",124,4},{"Tifinagh",124,8},{"tfng", | |
124,4},{"tifinagh",124,8},{"Tagalog",125,7},{"Tglg",125,4},{"tagalog",125,7}, | |
{"tglg",125,4},{"Thaa",126,4},{"Thaana",126,6},{"thaa",126,4},{"thaana",126,6}, | |
{"Thai",127,4},{"thai",127,4},{"Tibetan",128,7},{"Tibt",128,4},{"tibetan",128, | |
7},{"tibt",128,4},{"Tirh",129,4},{"Tirhuta",129,7},{"tirh",129,4},{"tirhuta", | |
129,7},{"Ugar",130,4},{"Ugaritic",130,8},{"ugar",130,4},{"ugaritic",130,8}, | |
{"Vai",131,3},{"Vaii",131,4},{"vai",131,3},{"vaii",131,4},{"Wara",132,4}, | |
{"Warang_Citi",132,11},{"wara",132,4},{"warangciti",132,10},{"Old_Persian",133, | |
11},{"Xpeo",133,4},{"oldpersian",133,10},{"xpeo",133,4},{"Cuneiform",134,9}, | |
{"Xsux",134,4},{"cuneiform",134,9},{"xsux",134,4},{"Yi",135,2},{"Yiii",135,4}, | |
{"yi",135,2},{"yiii",135,4},{"Inherited",136,9},{"Qaai",136,4},{"Zinh",136,4}, | |
{"inherited",136,9},{"qaai",136,4},{"zinh",136,4},{"Common",137,6},{"Zyyy",137, | |
4},{"common",137,6},{"zyyy",137,4},{"Unknown",138,7},{"Zzzz",138,4},{"unknown", | |
138,7},{"zzzz",138,4}}; | |
/* 101 Script */ | |
MVMUnicodeNamedAlias Sentence_Break[55] = { | |
{"AT",0,2},{"ATerm",0,5},{"at",0,2},{"aterm",0,5},{"CL",1,2},{"Close",1,5}, | |
{"cl",1,2},{"close",1,5},{"CR",2,2},{"cr",2,2},{"EX",3,2},{"Extend",3,6},{"ex", | |
3,2},{"extend",3,6},{"FO",4,2},{"Format",4,6},{"fo",4,2},{"format",4,6},{"LE",5, | |
2},{"OLetter",5,7},{"le",5,2},{"oletter",5,7},{"LF",6,2},{"lf",6,2},{"LO",7,2}, | |
{"Lower",7,5},{"lo",7,2},{"lower",7,5},{"NU",8,2},{"Numeric",8,7},{"nu",8,2}, | |
{"numeric",8,7},{"SC",9,2},{"SContinue",9,9},{"sc",9,2},{"scontinue",9,9},{"SE", | |
10,2},{"Sep",10,3},{"se",10,2},{"sep",10,3},{"SP",11,2},{"Sp",11,2},{"sp",11,2}, | |
{"ST",12,2},{"STerm",12,5},{"st",12,2},{"sterm",12,5},{"UP",13,2},{"Upper",13, | |
5},{"up",13,2},{"upper",13,5},{"Other",14,5},{"XX",14,2},{"other",14,5},{"xx", | |
14,2}}; | |
/* 103 Sentence_Break */ | |
/* 104 Sentence_Terminal */ | |
/* 108 Soft_Dotted */ | |
/* 109 Terminal_Punctuation */ | |
/* 112 Unified_Ideograph */ | |
/* 113 Uppercase */ | |
/* 115 White_Space */ | |
/* 115 Variation_Selector */ | |
MVMUnicodeNamedAlias Word_Break[80] = { | |
{"CR",0,2},{"cr",0,2},{"DQ",1,2},{"Double_Quote",1,12},{"doublequote",1,11}, | |
{"dq",1,2},{"EB",2,2},{"E_Base",2,6},{"eb",2,2},{"ebase",2,5},{"EBG",3,3}, | |
{"E_Base_GAZ",3,10},{"ebasegaz",3,8},{"ebg",3,3},{"EM",4,2},{"E_Modifier",4,10}, | |
{"em",4,2},{"emodifier",4,9},{"EX",5,2},{"ExtendNumLet",5,12},{"ex",5,2}, | |
{"extendnumlet",5,12},{"Extend",6,6},{"extend",6,6},{"FO",7,2},{"Format",7,6}, | |
{"fo",7,2},{"format",7,6},{"GAZ",8,3},{"Glue_After_Zwj",8,14},{"gaz",8,3}, | |
{"glueafterzwj",8,12},{"HL",9,2},{"Hebrew_Letter",9,13},{"hebrewletter",9,12}, | |
{"hl",9,2},{"KA",10,2},{"Katakana",10,8},{"ka",10,2},{"katakana",10,8}, | |
{"ALetter",11,7},{"LE",11,2},{"aletter",11,7},{"le",11,2},{"LF",12,2},{"lf",12, | |
2},{"MB",13,2},{"MidNumLet",13,9},{"mb",13,2},{"midnumlet",13,9},{"ML",14,2}, | |
{"MidLetter",14,9},{"midletter",14,9},{"ml",14,2},{"MN",15,2},{"MidNum",15,6}, | |
{"midnum",15,6},{"mn",15,2},{"NL",16,2},{"Newline",16,7},{"newline",16,7},{"nl", | |
16,2},{"NU",17,2},{"Numeric",17,7},{"nu",17,2},{"numeric",17,7},{"RI",18,2}, | |
{"Regional_Indicator",18,18},{"regionalindicator",18,17},{"ri",18,2},{"SQ",19, | |
2},{"Single_Quote",19,12},{"singlequote",19,11},{"sq",19,2},{"Other",20,5}, | |
{"XX",20,2},{"other",20,5},{"xx",20,2},{"ZWJ",21,3},{"zwj",21,3}}; | |
/* 116 Word_Break */ | |
/* 117 XID_Continue */ | |
/* 118 XID_Start */ | |
hash_pre alias_names_hash = { | |
NULL, alias_names, 437 | |
}; | |
int pvalue_meta_c_array[119] = { | |
-1/*0*/, | |
0/*1*/, | |
1/*2*/, | |
1/*3*/, | |
2/*4*/, | |
1/*5*/, | |
1/*6*/, | |
-1/*7*/, | |
-1/*8*/, | |
3/*9*/, | |
4/*10*/, | |
5/*11*/, | |
-1/*12*/, | |
1/*13*/, | |
1/*14*/, | |
1/*15*/, | |
1/*16*/, | |
1/*17*/, | |
1/*18*/, | |
1/*19*/, | |
1/*20*/, | |
1/*21*/, | |
1/*22*/, | |
-1/*23*/, | |
6/*24*/, | |
1/*25*/, | |
1/*26*/, | |
1/*27*/, | |
7/*28*/, | |
1/*29*/, | |
1/*30*/, | |
1/*31*/, | |
1/*32*/, | |
1/*33*/, | |
-1/*34*/, | |
1/*35*/, | |
8/*36*/, | |
1/*37*/, | |
9/*38*/, | |
1/*39*/, | |
1/*40*/, | |
10/*41*/, | |
1/*42*/, | |
1/*43*/, | |
1/*44*/, | |
1/*45*/, | |
1/*46*/, | |
1/*47*/, | |
1/*48*/, | |
11/*49*/, | |
12/*50*/, | |
-1/*51*/, | |
13/*52*/, | |
1/*53*/, | |
14/*54*/, | |
15/*55*/, | |
-1/*56*/, | |
-1/*57*/, | |
-1/*58*/, | |
-1/*59*/, | |
-1/*60*/, | |
-1/*61*/, | |
-1/*62*/, | |
-1/*63*/, | |
-1/*64*/, | |
-1/*65*/, | |
-1/*66*/, | |
-1/*67*/, | |
-1/*68*/, | |
-1/*69*/, | |
-1/*70*/, | |
16/*71*/, | |
1/*72*/, | |
1/*73*/, | |
-1/*74*/, | |
1/*75*/, | |
-1/*76*/, | |
-1/*77*/, | |
17/*78*/, | |
18/*79*/, | |
-1/*80*/, | |
17/*81*/, | |
18/*82*/, | |
1/*83*/, | |
19/*84*/, | |
-1/*85*/, | |
1/*86*/, | |
1/*87*/, | |
1/*88*/, | |
1/*89*/, | |
1/*90*/, | |
1/*91*/, | |
1/*92*/, | |
1/*93*/, | |
1/*94*/, | |
1/*95*/, | |
1/*96*/, | |
1/*97*/, | |
1/*98*/, | |
-1/*99*/, | |
-1/*100*/, | |
20/*101*/, | |
-1/*102*/, | |
21/*103*/, | |
1/*104*/, | |
-1/*105*/, | |
-1/*106*/, | |
-1/*107*/, | |
1/*108*/, | |
1/*109*/, | |
-1/*110*/, | |
-1/*111*/, | |
1/*112*/, | |
1/*113*/, | |
-1/*114*/, | |
1/*115*/, | |
22/*116*/, | |
1/*117*/, | |
1}; | |
/*118*/ | |
hash_pre mapping[23] = { | |
{NULL,Age,58/*0*/}, | |
{NULL,Alphabetic,16/*1*/}, | |
{NULL,Bidi_Class,92/*2*/}, | |
{NULL,Bidi_Paired_Bracket_Type, | |
9/*3*/}, | |
{NULL,Block,788/*4*/}, | |
{NULL,Canonical_Combining_Class,211/*5*/}, | |
{NULL,Decomposition_Type, | |
64/*6*/}, | |
{NULL,East_Asian_Width,24/*7*/}, | |
{NULL,General_Category,157/*8*/}, | |
{NULL,Grapheme_Cluster_Break, | |
56/*9*/}, | |
{NULL,Hangul_Syllable_Type,24/*10*/}, | |
{NULL,Indic_Positional_Category,28/*11*/}, | |
{NULL, | |
Indic_Syllabic_Category,70/*12*/}, | |
{NULL,Jamo_Short_Name,104/*13*/}, | |
{NULL,Joining_Group,180}, | |
{NULL,Joining_Type,24/*14*/}, | |
{NULL,Line_Break,162/*15*/}, | |
{NULL,NFC_Quick_Check,12/*16*/}, | |
{NULL, | |
NFD_Quick_Check,8/*17*/}, | |
{NULL,Numeric_Type,14/*18*/}, | |
{NULL,Script,548/*19*/}, | |
{NULL,Sentence_Break, | |
55/*20*/}, | |
{NULL,Word_Break,80}}; | |
/*21*/ | |
/*"0" => $["Age"] | |
"1" => $["Alphabetic", "ASCII_Hex_Digit", "Bidi_Control", "Bidi_Mirrored", "Case_Ignorable", "Cased", "Changes_When_Casefolded", "Changes_When_Casemapped", "Changes_When_Lowercased", "Changes_When_NFKC_Casefolded", "Changes_When_Titlecased", "Changes_When_Uppercased", "Composition_Exclusion", "Dash", "Default_Ignorable_Code_Point", "Deprecated", "Diacritic", "Expands_On_NFC", "Expands_On_NFD", "Expands_On_NFKC", "Expands_On_NFKD", "Extender", "Full_Composition_Exclusion", "Grapheme_Base", "Grapheme_Extend", "Grapheme_Link", "Hex_Digit", "Hyphen", "Ideographic", "ID_Continue", "ID_Start", "IDS_Binary_Operator", "IDS_Trinary_Operator", "Join_Control", "Logical_Order_Exception", "Lowercase", "Math", "Noncharacter_Code_Point", "Other_Alphabetic", "Other_Default_Ignorable_Code_Point", "Other_Grapheme_Extend", "Other_ID_Continue", "Other_ID_Start", "Other_Lowercase", "Other_Math", "Other_Uppercase", "Pattern_Syntax", "Pattern_White_Space", "Prepended_Concatenation_Mark", "Quotation_Mark", "Radical", "Sentence_Terminal", "Soft_Dotted", "Terminal_Punctuation", "Unified_Ideograph", "Uppercase", "White_Space", "Variation_Selector", "XID_Continue", "XID_Start"] | |
"2" => $["Bidi_Class"] | |
"3" => $["Bidi_Paired_Bracket_Type"] | |
"4" => $["Block"] | |
"5" => $["Canonical_Combining_Class"] | |
"6" => $["Decomposition_Type"] | |
"7" => $["East_Asian_Width"] | |
"8" => $["General_Category"] | |
"9" => $["Grapheme_Cluster_Break"] | |
"10" => $["Hangul_Syllable_Type"] | |
"11" => $["Indic_Positional_Category"] | |
"12" => $["Indic_Syllabic_Category"] | |
"13" => $["Jamo_Short_Name"] | |
"14" => $["Joining_Group"] | |
"15" => $["Joining_Type"] | |
"16" => $["Line_Break"] | |
"17" => $["NFC_Quick_Check", "NFKC_Quick_Check"] | |
"18" => $["NFD_Quick_Check", "NFKD_Quick_Check"] | |
"19" => $["Numeric_Type"] | |
"20" => $["Script"] | |
"21" => $["Sentence_Break"] | |
"22" => $["Word_Break"] | |
*/ | |
MVMUnicodeNamedAlias_hash* load_hash_3 (MVMUnicodeNamedAlias *thingy, int elems) { | |
int i; | |
MVMUnicodeNamedAlias_hash *users = NULL; | |
MVMUnicodeNamedAlias_hash *kv; | |
for (i = 0; i < elems; i++) { | |
//fprintf(stderr, "%s %i\n", thingy[i].name, thingy[i].pvaluecode); | |
kv = (MVMUnicodeNamedAlias_hash*)malloc(sizeof(MVMUnicodeNamedAlias_hash)); | |
kv->name = thingy[i].name; | |
kv->pvaluecode = thingy[i].pvaluecode; | |
HASH_ADD_KEYPTR(hh, users, kv->name, thingy[i].strlen, kv); | |
} | |
fprintf(stderr, "Loaded %i elems into hash %p\n", elems, users); | |
return users; | |
} | |
int load_pvalue_hash (hash_pre *pre) { | |
if (!pre->hash) | |
pre->hash = load_hash_3(pre->source, pre->elems); | |
return pre->hash ? 1 : 0; | |
} | |
int normalize (char *input, char *output) { | |
int strlen_ = strlen(input); | |
int i, b = 0; | |
for (i = 0; i < strlen_; i++) { | |
if (65 <= input[i] && input[i] <= 90) { | |
output[b] = input[i] + 32; | |
b++; | |
} | |
else if (97 <= input[i] && input[i] <= 122) { | |
output[b] = input[i]; | |
b++; | |
} | |
} | |
output[b] = '\0'; | |
return b; | |
} | |
int find (MVMUnicodeNamedAlias_hash *my_hash, char *query, char *text) { | |
MVMUnicodeNamedAlias_hash *kv; | |
HASH_FIND(hh, my_hash, query, strlen(query), kv); | |
if (!kv) { | |
char new[20]; | |
int slen = normalize(query, new); | |
fprintf(stderr, "Couldn't find %s in %p\n", query, my_hash); | |
HASH_FIND(hh, my_hash, new, slen, kv); | |
if (!kv) { | |
fprintf(stderr, "Couldn't find %s in %p\n", new, my_hash); | |
return -1; | |
} | |
else { | |
fprintf(stderr, "Found using normalized version %s\n", new); | |
} | |
} | |
printf("%s %s %i\n", text, query, kv->pvaluecode); | |
return kv->pvaluecode; | |
} | |
int lookup_propcode (char *query, hash_pre *alias_names_hash) { | |
if (!alias_names_hash->hash) | |
alias_names_hash->hash = load_hash_3(alias_names_hash->source, alias_names_hash->elems); | |
return find(alias_names_hash->hash, query, "propcode"); | |
} | |
int lookup_pvalue (int propcode, char *query) { | |
if (propcode <= 0) { | |
fprintf(stderr, "Can't look up propcode '%i', 0 or below not allowed\n"); | |
return -1; | |
} | |
int offset = 0; | |
int offset2 = 0; | |
int final = pvalue_meta_c_array[propcode + offset] + offset2; | |
load_pvalue_hash(&mapping[ pvalue_meta_c_array[propcode + offset] + offset2 ]); | |
fprintf(stderr, "pvalue_meta_c_array[propcode + %i] = ", offset); | |
fprintf(stderr, "pvalue_meta_c_array[%i + %i = %i] = %i \n\t=> mapping[%i + %i] = mapping[%i]\n", | |
propcode, | |
offset, | |
propcode + offset, | |
pvalue_meta_c_array[propcode + offset], | |
pvalue_meta_c_array[propcode + offset], | |
offset2, | |
final | |
); | |
return find(mapping[ pvalue_meta_c_array[propcode + offset] + offset2 ].hash, query, "pvalue"); | |
} | |
int main (int argc, char *argv[]) { | |
MVMUnicodeNamedAlias_hash *kv; | |
char *query = "Glue_After_Zwj"; | |
char *property_name = "Grapheme_Cluster_Break"; | |
if (2 <= argc) { | |
property_name = argv[1]; | |
if (2 < argc) query = argv[2]; | |
} | |
int propcode = lookup_propcode(property_name, &alias_names_hash); | |
if (0 < propcode && 2 < argc) { | |
lookup_pvalue(propcode, query); | |
return 0; | |
} | |
return 1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment