Created
August 25, 2011 17:52
-
-
Save arcusfelis/1171290 to your computer and use it in GitHub Desktop.
CLDR Collation XML
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8" ?> | |
<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd"> | |
<ldml> | |
<identity> | |
<version number="$Revision: 5868 $"/> | |
<generation date="$Date: 2011-05-09 01:14:55 -0500 (Mon, 09 May 2011) $"/> | |
<language type="root" /> | |
</identity> | |
<collations validSubLocales="chr chr_US ee ee_GH ee_TG ga ga_IE id id_ID it it_CH it_IT ka ka_GE ky ky_KG ms ms_BN ms_MY nl nl_AW nl_BE nl_NL pt pt_AO pt_BR pt_GW pt_MZ pt_PT pt_ST st st_LS st_ZA sw sw_KE sw_TZ xh xh_ZA zu zu_ZA"> | |
<default type="standard"/> | |
<collation type="standard"> | |
</collation> | |
<collation type="search" > | |
<settings normalization="on"/> | |
<!-- root search, suppress contractions for Thai, Lao --> | |
<suppress_contractions>[เ-ไ ເ-ໄ]</suppress_contractions> | |
<rules> | |
<!-- root search rules for Arabic, Hebrew --> | |
<reset>ا</reset> <!-- 0627 ARABIC LETTER ALEF --> | |
<t>ﺎ</t><t>ﺍ</t> <!-- FE8E, FE8D: FINAL FORM, ISOLATED FORM --> | |
<s>آ</s> <!-- 0622 ARABIC LETTER ALEF WITH MADDA ABOVE --> | |
<t>ﺂ</t><t>ﺁ</t> <!-- FE82, FE81: FINAL FORM, ISOLATED FORM --> | |
<s>أ</s> <!-- 0623 ARABIC LETTER ALEF WITH HAMZA ABOVE --> | |
<t>ﺄ</t><t>ﺃ</t> <!-- FE84, FE83: FINAL FORM, ISOLATED FORM --> | |
<s>إ</s> <!-- 0625 ARABIC LETTER ALEF WITH HAMZA BELOW --> | |
<t>ﺈ</t><t>ﺇ</t> <!-- FE88, FE87: FINAL FORM, ISOLATED FORM --> | |
<reset>و</reset> <!-- 0648 ARABIC LETTER WAW --> | |
<t>ۥ</t> <!-- 06E5: SMALL WAW --> | |
<t>ﻮ</t><t>ﻭ</t> <!-- FEEE, FEED: FINAL FORM, ISOLATED FORM --> | |
<s>ؤ</s> <!-- 0624 ARABIC LETTER WAW WITH HAMZA ABOVE --> | |
<t>ﺆ</t><t>ﺅ</t> <!-- FE86, FE85: FINAL FORM, ISOLATED FORM --> | |
<reset>ي</reset> <!-- 064A ARABIC LETTER YEH --> | |
<t>ۦ</t> <!-- 06E6: ARABIC SMALL YEH --> | |
<t>ﻳ</t><t>ﻴ</t><t>ﻲ</t><t>ﻱ</t> <!-- FEF3, FEF4, FEF2, FEF1: INITIAL FORM, MEDIAL FORM, FINAL FORM, ISOLATED FORM --> | |
<s>ئ</s> <!-- 0626 ARABIC LETTER YEH WITH HAMZA ABOVE --> | |
<t>ﺋ</t><t>ﺌ</t><t>ﺊ</t><t>ﺉ</t> <!-- FE8B, FE8C, FE8A, FE89: INITIAL FORM, MEDIAL FORM. FINAL FORM, ISOLATED FORM --> | |
<s>ى</s> <!-- 0649 ARABIC LETTER ALEF MAKSURA --> | |
<t>ﯨ</t><t>ﯩ</t> <!-- FBE8, FBE9: UIGHUR KAZAKH KIRGHIZ ALEF MAKSURA INITIAL FORM, MEDIAL FORM --> | |
<t>ﻰ</t><t>ﻯ</t> <!-- FEF0, FEEF: FINAL FORM, ISOLATED FORM --> | |
<reset>ه</reset> <!-- 0647 ARABIC LETTER HEH --> | |
<t>ﻫ</t><t>ﻬ</t><t>ﻪ</t><t>ﻩ</t> <!-- FEEB, FEEC, FEEA, FEE9: INITIAL FORM, MEDIAL FORM, FINAL FORM;, ISOLATED FORM --> | |
<s>ة</s> <!-- 0629 ARABIC LETTER TEH MARBUTA --> | |
<t>ﺔ</t><t>ﺓ</t> <!-- FE94, FE93: FINAL FORM, ISOLATED FORM --> | |
<reset><last_primary_ignorable/></reset> | |
<s>׳</s> <!-- 05F3 HEBREW PUNCTUATION GERESH --> | |
<s>״</s> <!-- 05F4 HEBREW PUNCTUATION GERSHAYIM --> | |
<s>ـ</s> <!-- 0640 ARABIC TATWEEL --> | |
<!-- Don't need explicit entries for 064B - 0652 ARABIC FATHATAN - ARABIC SUKUN; | |
these are already ignorable at level 1, and are not involved in contractions --> | |
<s>ฺ</s> <!-- 0E3A THAI CHARACTER PHINTHU --> | |
<!-- root search rules for modern Korean jamos --> | |
<!-- Korean modern complex consonants, decompose: x(L) = CHOSEONG x, x(T) = JONGSEONG x --> | |
<reset>ᄀ</reset> <!-- 1100 KIYEOK(L) = 11A8 KIYEOK(T) --> | |
<i>ᆨ</i> | |
<reset>ᄀᄀ</reset> <!-- 1100 KIYEOK(L) + 1100 KIYEOK(L) = 1101 SSANGKIYEOK(L) = 11A9 SSANGKIYEOK(T) --> | |
<i>ᄁ</i> | |
<i>ᆩ</i> | |
<reset>ᄀᄉ</reset> <!-- 1100 KIYEOK(L) + 1109 SIOS(L) = 11AA KIYEOK-SIOS(T) --> | |
<i>ᆪ</i> | |
<reset>ᄂ</reset> <!-- 1102 NIEUN(L) = 11AB NIEUN(T) --> | |
<i>ᆫ</i> | |
<reset>ᄂᄌ</reset> <!-- 1102 NIEUN(L) + 110C CIEUC(L) = 11AC NIEUN-CIEUC(T), also archaic 115C NIEUN-CIEUC(L) --> | |
<i>ᆬ</i> | |
<reset>ᄂᄒ</reset> <!-- 1102 NIEUN(L) + 1112 HIEUH(L) = 11AD NIEUN-HIEUH(T), also archaic 115D NIEUN-HIEUH(L) --> | |
<i>ᆭ</i> | |
<reset>ᄃ</reset> <!-- 1103 TIKEUT(L) = 11AE TIKEUT(T) --> | |
<i>ᆮ</i> | |
<reset>ᄃᄃ</reset> <!-- 1103 TIKEUT(L) + 1103 TIKEUT(L) = 1104 SSANGTIKEUT(L) --> | |
<i>ᄄ</i> | |
<reset>ᄅ</reset> <!-- 1105 RIEUL(L) = 11AF RIEUL(T) --> | |
<i>ᆯ</i> | |
<reset>ᄅᄀ</reset> <!-- 1105 RIEUL(L) + 1100 KIYEOK(L) = 11B0 RIEUL-KIYEOK(T) --> | |
<i>ᆰ</i> | |
<reset>ᄅᄆ</reset> <!-- 1105 RIEUL(L) + 1106 MIEUM(L) = 11B1 RIEUL-MIEUM(T) --> | |
<i>ᆱ</i> | |
<reset>ᄅᄇ</reset> <!-- 1105 RIEUL(L) + 1107 PIEUP(L) = 11B2 RIEUL-PIEUP(T) --> | |
<i>ᆲ</i> | |
<reset>ᄅᄉ</reset> <!-- 1105 RIEUL(L) + 1109 SIOS(L) = 11B3 RIEUL-SIOS(T) --> | |
<i>ᆳ</i> | |
<reset>ᄅᄐ</reset> <!-- 1105 RIEUL(L) + 1110 THIEUTH(L) = 11B4 RIEUL-THIEUTH(T) --> | |
<i>ᆴ</i> | |
<reset>ᄅᄑ</reset> <!-- 1105 RIEUL(L) + 1111 PHIEUPH(L) = 11B5 RIEUL-PHIEUPH(T) --> | |
<i>ᆵ</i> | |
<reset>ᄅᄒ</reset> <!-- 1105 RIEUL(L) + 1112 HIEUH(L) = 11B6 RIEUL-HIEUH(T), also archic 111A RIEUL-HIEUH(L) --> | |
<i>ᆶ</i> | |
<reset>ᄆ</reset> <!-- 1106 MIEUM(L) = 11B7 MIEUM(T) --> | |
<i>ᆷ</i> | |
<reset>ᄇ</reset> <!-- 1107 PIEUP(L) = 11B8 PIEUP(T) --> | |
<i>ᆸ</i> | |
<reset>ᄇᄇ</reset> <!-- 1107 PIEUP(L) + 1107 PIEUP(L) = 1108 SSANGPIEUP(L) --> | |
<i>ᄈ</i> | |
<reset>ᄇᄉ</reset> <!-- 1107 PIEUP(L) + 1109 SIOS(L) = 11B9 PIEUP-SIOS(T), also archaic 1121 PIEUP-SIOS(L) --> | |
<i>ᆹ</i> | |
<reset>ᄉ</reset> <!-- 1109 SIOS(L) = 11BA SIOS(T) --> | |
<i>ᆺ</i> | |
<reset>ᄉᄉ</reset> <!-- 1109 SIOS(L) + 1109 SIOS(L) = 110A SSANGSIOS(L) = 11BB SSANGSIOS(T) --> | |
<i>ᄊ</i> | |
<i>ᆻ</i> | |
<reset>ᄋ</reset> <!-- 110B IEUNG(L) = 11BC IEUNG(T) --> | |
<i>ᆼ</i> | |
<reset>ᄌ</reset> <!-- 110C CIEUC(L) = 11BD CIEUC(T) --> | |
<i>ᆽ</i> | |
<reset>ᄌᄌ</reset> <!-- 110C CIEUC(L) + 110C CIEUC(L) = 110D SSANGCIEUC(L) --> | |
<i>ᄍ</i> | |
<reset>ᄎ</reset> <!-- 110E CHIEUCH(L) = 11BE CHIEUCH(T) --> | |
<i>ᆾ</i> | |
<reset>ᄏ</reset> <!-- 110F KHIEUKH(L) = 11BF KHIEUKH(T) --> | |
<i>ᆿ</i> | |
<reset>ᄐ</reset> <!-- 1110 THIEUTH(L) = 11C0 THIEUTH(T) --> | |
<i>ᇀ</i> | |
<reset>ᄑ</reset> <!-- 1111 PHIEUPH(L) = 11C1 PHIEUPH(T) --> | |
<i>ᇁ</i> | |
<reset>ᄒ</reset> <!-- 1112 HIEUH(L) = 11C2 HIEUH(T) --> | |
<i>ᇂ</i> | |
<!-- Korean modern complex vowels, decompose --> | |
<reset>ᅡᅵ</reset> <!-- 1161 A(V) + 1175 I(V) = 1162 AE(V) --> | |
<i>ᅢ</i> | |
<reset>ᅣᅵ</reset> <!-- 1163 YA(V) + 1175 I(V) = 1164 YAE(V) --> | |
<i>ᅤ</i> | |
<reset>ᅥᅵ</reset> <!-- 1165 EO(V) + 1175 I(V) = 1166 E(V) --> | |
<i>ᅦ</i> | |
<reset>ᅧᅵ</reset> <!-- 1167 YEO(V) + 1175 I(V) = 1168 YE(V) --> | |
<i>ᅨ</i> | |
<reset>ᅩᅡ</reset> <!-- 1169 O(V) + 1161 A(V) = 116A WA(V) --> | |
<i>ᅪ</i> | |
<reset>ᅩᅡᅵ</reset> <!-- 1169 O(V) + 1161 A(V) + 1175 I(V) = 116B WAE(V) --> | |
<i>ᅫ</i> | |
<reset>ᅩᅵ</reset> <!-- 1169 O(V) + 1175 I(V) = 116C OE(V) --> | |
<i>ᅬ</i> | |
<reset>ᅮᅴ</reset> <!-- 116E U(V) + 1174 YI(V) = 116F WEO(V) --> | |
<i>ᅯ</i> | |
<reset>ᅮᅴᅵ</reset> <!-- 116E U(V) + 1174 YI(V) + 1175 I(V) = 1170 WE(V) --> | |
<i>ᅰ</i> | |
<reset>ᅮᅵ</reset> <!-- 116E U(V) + 1175 I(V) = 1171 WI(V) --> | |
<i>ᅱ</i> | |
</rules> | |
</collation> | |
<collation type="ducet"> | |
<settings normalization="on" variableTop="𝍱"/> <!--U+1D371 COUNTING ROD TENS DIGIT NINE--> | |
<rules> | |
<reset> </reset> <!--U+1680 OGHAM SPACE MARK--> | |
<p>`</p> <!--U+0060 GRAVE ACCENT--> | |
<t>`</t> <!--U+FF40 FULLWIDTH GRAVE ACCENT--> | |
<p>´</p> <!--U+00B4 ACUTE ACCENT--> | |
<i>΄</i> <!--U+0384 GREEK TONOS--> | |
<p>˜</p> <!--U+02DC SMALL TILDE--> | |
<p>^</p> <!--U+005E CIRCUMFLEX ACCENT--> | |
<t>^</t> <!--U+FF3E FULLWIDTH CIRCUMFLEX ACCENT--> | |
<p>¯</p> <!--U+00AF MACRON--> | |
<t> ̄</t> <!--U+FFE3 FULLWIDTH MACRON--> | |
<reset>﹌</reset> <!--U+FE4C DOUBLE WAVY OVERLINE--> | |
<p>˘</p> <!--U+02D8 BREVE--> | |
<p>˙</p> <!--U+02D9 DOT ABOVE--> | |
<p>¨</p> <!--U+00A8 DIAERESIS--> | |
<p>˚</p> <!--U+02DA RING ABOVE--> | |
<p>˝</p> <!--U+02DD DOUBLE ACUTE ACCENT--> | |
<p>᾽</p> <!--U+1FBD GREEK KORONIS--> | |
<i>᾿</i> <!--U+1FBF GREEK PSILI--> | |
<p>῾</p> <!--U+1FFE GREEK DASIA--> | |
<p>¸</p> <!--U+00B8 CEDILLA--> | |
<p>˛</p> <!--U+02DB OGONEK--> | |
<reset>‗</reset> <!--U+2017 DOUBLE LOW LINE--> | |
<p>῀</p> <!--U+1FC0 GREEK PERISPOMENI--> | |
<p>﮲</p> <!--U+FBB2 ARABIC SYMBOL DOT ABOVE--> | |
<p>﮳</p> <!--U+FBB3 ARABIC SYMBOL DOT BELOW--> | |
<p>﮴</p> <!--U+FBB4 ARABIC SYMBOL TWO DOTS ABOVE--> | |
<p>﮵</p> <!--U+FBB5 ARABIC SYMBOL TWO DOTS BELOW--> | |
<p>﮶</p> <!--U+FBB6 ARABIC SYMBOL THREE DOTS ABOVE--> | |
<p>﮷</p> <!--U+FBB7 ARABIC SYMBOL THREE DOTS BELOW--> | |
<p>﮸</p> <!--U+FBB8 ARABIC SYMBOL THREE DOTS POINTING DOWNWARDS ABOVE--> | |
<p>﮹</p> <!--U+FBB9 ARABIC SYMBOL THREE DOTS POINTING DOWNWARDS BELOW--> | |
<p>﮺</p> <!--U+FBBA ARABIC SYMBOL FOUR DOTS ABOVE--> | |
<p>﮻</p> <!--U+FBBB ARABIC SYMBOL FOUR DOTS BELOW--> | |
<p>﮼</p> <!--U+FBBC ARABIC SYMBOL DOUBLE VERTICAL BAR BELOW--> | |
<p>﮽</p> <!--U+FBBD ARABIC SYMBOL TWO DOTS VERTICALLY ABOVE--> | |
<p>﮾</p> <!--U+FBBE ARABIC SYMBOL TWO DOTS VERTICALLY BELOW--> | |
<p>﮿</p> <!--U+FBBF ARABIC SYMBOL RING--> | |
<p>﯀</p> <!--U+FBC0 ARABIC SYMBOL SMALL TAH ABOVE--> | |
<p>﯁</p> <!--U+FBC1 ARABIC SYMBOL SMALL TAH BELOW--> | |
<p>゛</p> <!--U+309B KATAKANA-HIRAGANA VOICED SOUND MARK--> | |
<p>゜</p> <!--U+309C KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK--> | |
<reset>᠅</reset> <!--U+1805 MONGOLIAN FOUR DOTS--> | |
<p>༔</p> <!--U+0F14 TIBETAN MARK GTER TSHEG--> | |
<reset>჻</reset> <!--U+10FB GEORGIAN PARAGRAPH SEPARATOR--> | |
<p>፠</p> <!--U+1360 ETHIOPIC SECTION MARK--> | |
<reset>𐄁</reset> <!--U+10101 AEGEAN WORD SEPARATOR DOT--> | |
<p>𐄂</p> <!--U+10102 AEGEAN CHECK MARK--> | |
<reset>﴿</reset> <!--U+FD3F ORNATE RIGHT PARENTHESIS--> | |
<p>§</p> <!--U+00A7 SECTION SIGN--> | |
<p>¶</p> <!--U+00B6 PILCROW SIGN--> | |
<reset>⁋</reset> <!--U+204B REVERSED PILCROW SIGN--> | |
<p>©</p> <!--U+00A9 COPYRIGHT SIGN--> | |
<p>®</p> <!--U+00AE REGISTERED SIGN--> | |
<reset>/</reset> <!--U+FF0F FULLWIDTH SOLIDUS--> | |
<p>⁄</p> <!--U+2044 FRACTION SLASH--> | |
<reset>﹠</reset> <!--U+FE60 SMALL AMPERSAND--> | |
<p>⅋</p> <!--U+214B TURNED AMPERSAND--> | |
<reset>؊</reset> <!--U+060A ARABIC-INDIC PER TEN THOUSAND SIGN--> | |
<p>⁒</p> <!--U+2052 COMMERCIAL MINUS SIGN--> | |
<reset>᰿</reset> <!--U+1C3F LEPCHA PUNCTUATION TSHOOK--> | |
<p>᥀</p> <!--U+1940 LIMBU SIGN LOO--> | |
<reset>၏</reset> <!--U+104F MYANMAR SYMBOL GENITIVE--> | |
<p>႞</p> <!--U+109E MYANMAR SYMBOL SHAN ONE--> | |
<p>႟</p> <!--U+109F MYANMAR SYMBOL SHAN EXCLAMATION--> | |
<p>꩷</p> <!--U+AA77 MYANMAR SYMBOL AITON EXCLAMATION--> | |
<p>꩸</p> <!--U+AA78 MYANMAR SYMBOL AITON ONE--> | |
<p>꩹</p> <!--U+AA79 MYANMAR SYMBOL AITON TWO--> | |
<p>ៗ</p> <!--U+17D7 KHMER SIGN LEK TOO--> | |
<reset>꠹</reset> <!--U+A839 NORTH INDIC QUANTITY MARK--> | |
<p>๏</p> <!--U+0E4F THAI CHARACTER FONGMAN--> | |
<reset>¦</reset> <!--U+FFE4 FULLWIDTH BROKEN BAR--> | |
<p>‖</p> <!--U+2016 DOUBLE VERTICAL LINE--> | |
<reset>❧</reset> <!--U+2767 ROTATED FLORAL HEART BULLET--> | |
<p>❨</p> <!--U+2768 MEDIUM LEFT PARENTHESIS ORNAMENT--> | |
<p>❩</p> <!--U+2769 MEDIUM RIGHT PARENTHESIS ORNAMENT--> | |
<p>❪</p> <!--U+276A MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT--> | |
<p>❫</p> <!--U+276B MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT--> | |
<p>❬</p> <!--U+276C MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT--> | |
<p>❭</p> <!--U+276D MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT--> | |
<p>❮</p> <!--U+276E HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT--> | |
<p>❯</p> <!--U+276F HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT--> | |
<p>❰</p> <!--U+2770 HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT--> | |
<p>❱</p> <!--U+2771 HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT--> | |
<p>❲</p> <!--U+2772 LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT--> | |
<p>❳</p> <!--U+2773 LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT--> | |
<p>❴</p> <!--U+2774 MEDIUM LEFT CURLY BRACKET ORNAMENT--> | |
<p>❵</p> <!--U+2775 MEDIUM RIGHT CURLY BRACKET ORNAMENT--> | |
<reset>⟄</reset> <!--U+27C4 OPEN SUPERSET--> | |
<p>⟅</p> <!--U+27C5 LEFT S-SHAPED BAG DELIMITER--> | |
<p>⟆</p> <!--U+27C6 RIGHT S-SHAPED BAG DELIMITER--> | |
<reset>⟥</reset> <!--U+27E5 WHITE SQUARE WITH RIGHTWARDS TICK--> | |
<p>⟦</p> <!--U+27E6 MATHEMATICAL LEFT WHITE SQUARE BRACKET--> | |
<p>⟧</p> <!--U+27E7 MATHEMATICAL RIGHT WHITE SQUARE BRACKET--> | |
<p>⟨</p> <!--U+27E8 MATHEMATICAL LEFT ANGLE BRACKET--> | |
<p>⟩</p> <!--U+27E9 MATHEMATICAL RIGHT ANGLE BRACKET--> | |
<p>⟪</p> <!--U+27EA MATHEMATICAL LEFT DOUBLE ANGLE BRACKET--> | |
<p>⟫</p> <!--U+27EB MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET--> | |
<reset>⧗</reset> <!--U+29D7 BLACK HOURGLASS--> | |
<p>⧘</p> <!--U+29D8 LEFT WIGGLY FENCE--> | |
<p>⧙</p> <!--U+29D9 RIGHT WIGGLY FENCE--> | |
<p>⧚</p> <!--U+29DA LEFT DOUBLE WIGGLY FENCE--> | |
<p>⧛</p> <!--U+29DB RIGHT DOUBLE WIGGLY FENCE--> | |
<reset>𐩾</reset> <!--U+10A7E OLD SOUTH ARABIAN NUMBER FIFTY--> | |
<p>𐩿</p> <!--U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR--> | |
<reset>Ꞧ</reset> <!--U+A7A6 LATIN CAPITAL LETTER R WITH OBLIQUE STROKE / 20EB--> | |
<x><t>₨</t><extend>s</extend></x> <!--U+20A8 RUPEE SIGN / 0073--> | |
<reset>ﷶ</reset> <!--U+FDF6 ARABIC LIGATURE RASOUL ISOLATED FORM / 0633 0648 0644--> | |
<x><t>﷼</t><extend>یال</extend></x> <!--U+FDFC RIAL SIGN / 06CC 0627 0644--> | |
</rules> | |
</collation> | |
</collations> | |
<!-- for UCA rules please see: http://www.unicode.org/reports/tr10/ --> | |
</ldml> | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment