Skip to content

Instantly share code, notes, and snippets.

@arcusfelis
Created August 25, 2011 17:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arcusfelis/1171290 to your computer and use it in GitHub Desktop.
Save arcusfelis/1171290 to your computer and use it in GitHub Desktop.
CLDR Collation XML
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
<ldml>
<identity>
<version number="$Revision: 5868 $"/>
<generation date="$Date: 2011-05-09 01:14:55 -0500 (Mon, 09 May 2011) $"/>
<language type="root" />
</identity>
<collations validSubLocales="chr chr_US ee ee_GH ee_TG ga ga_IE id id_ID it it_CH it_IT ka ka_GE ky ky_KG ms ms_BN ms_MY nl nl_AW nl_BE nl_NL pt pt_AO pt_BR pt_GW pt_MZ pt_PT pt_ST st st_LS st_ZA sw sw_KE sw_TZ xh xh_ZA zu zu_ZA">
<default type="standard"/>
<collation type="standard">
</collation>
<collation type="search" >
<settings normalization="on"/>
<!-- root search, suppress contractions for Thai, Lao -->
<suppress_contractions>[เ-ไ ເ-ໄ]</suppress_contractions>
<rules>
<!-- root search rules for Arabic, Hebrew -->
<reset>ا</reset> <!-- 0627 ARABIC LETTER ALEF -->
<t>ﺎ</t><t>ﺍ</t> <!-- FE8E, FE8D: FINAL FORM, ISOLATED FORM -->
<s>آ</s> <!-- 0622 ARABIC LETTER ALEF WITH MADDA ABOVE -->
<t>ﺂ</t><t>ﺁ</t> <!-- FE82, FE81: FINAL FORM, ISOLATED FORM -->
<s>أ</s> <!-- 0623 ARABIC LETTER ALEF WITH HAMZA ABOVE -->
<t>ﺄ</t><t>ﺃ</t> <!-- FE84, FE83: FINAL FORM, ISOLATED FORM -->
<s>إ</s> <!-- 0625 ARABIC LETTER ALEF WITH HAMZA BELOW -->
<t>ﺈ</t><t>ﺇ</t> <!-- FE88, FE87: FINAL FORM, ISOLATED FORM -->
<reset>و</reset> <!-- 0648 ARABIC LETTER WAW -->
<t>ۥ</t> <!-- 06E5: SMALL WAW -->
<t>ﻮ</t><t>ﻭ</t> <!-- FEEE, FEED: FINAL FORM, ISOLATED FORM -->
<s>ؤ</s> <!-- 0624 ARABIC LETTER WAW WITH HAMZA ABOVE -->
<t>ﺆ</t><t>ﺅ</t> <!-- FE86, FE85: FINAL FORM, ISOLATED FORM -->
<reset>ي</reset> <!-- 064A ARABIC LETTER YEH -->
<t>ۦ</t> <!-- 06E6: ARABIC SMALL YEH -->
<t>ﻳ</t><t>ﻴ</t><t>ﻲ</t><t>ﻱ</t> <!-- FEF3, FEF4, FEF2, FEF1: INITIAL FORM, MEDIAL FORM, FINAL FORM, ISOLATED FORM -->
<s>ئ</s> <!-- 0626 ARABIC LETTER YEH WITH HAMZA ABOVE -->
<t>ﺋ</t><t>ﺌ</t><t>ﺊ</t><t>ﺉ</t> <!-- FE8B, FE8C, FE8A, FE89: INITIAL FORM, MEDIAL FORM. FINAL FORM, ISOLATED FORM -->
<s>ى</s> <!-- 0649 ARABIC LETTER ALEF MAKSURA -->
<t>ﯨ</t><t>ﯩ</t> <!-- FBE8, FBE9: UIGHUR KAZAKH KIRGHIZ ALEF MAKSURA INITIAL FORM, MEDIAL FORM -->
<t>ﻰ</t><t>ﻯ</t> <!-- FEF0, FEEF: FINAL FORM, ISOLATED FORM -->
<reset>ه</reset> <!-- 0647 ARABIC LETTER HEH -->
<t>ﻫ</t><t>ﻬ</t><t>ﻪ</t><t>ﻩ</t> <!-- FEEB, FEEC, FEEA, FEE9: INITIAL FORM, MEDIAL FORM, FINAL FORM;, ISOLATED FORM -->
<s>ة</s> <!-- 0629 ARABIC LETTER TEH MARBUTA -->
<t>ﺔ</t><t>ﺓ</t> <!-- FE94, FE93: FINAL FORM, ISOLATED FORM -->
<reset><last_primary_ignorable/></reset>
<s>׳</s> <!-- 05F3 HEBREW PUNCTUATION GERESH -->
<s>״</s> <!-- 05F4 HEBREW PUNCTUATION GERSHAYIM -->
<s>ـ</s> <!-- 0640 ARABIC TATWEEL -->
<!-- Don't need explicit entries for 064B - 0652 ARABIC FATHATAN - ARABIC SUKUN;
these are already ignorable at level 1, and are not involved in contractions -->
<s>ฺ</s> <!-- 0E3A THAI CHARACTER PHINTHU -->
<!-- root search rules for modern Korean jamos -->
<!-- Korean modern complex consonants, decompose: x(L) = CHOSEONG x, x(T) = JONGSEONG x -->
<reset>ᄀ</reset> <!-- 1100 KIYEOK(L) = 11A8 KIYEOK(T) -->
<i>ᆨ</i>
<reset>ᄀᄀ</reset> <!-- 1100 KIYEOK(L) + 1100 KIYEOK(L) = 1101 SSANGKIYEOK(L) = 11A9 SSANGKIYEOK(T) -->
<i>ᄁ</i>
<i>ᆩ</i>
<reset>ᄀᄉ</reset> <!-- 1100 KIYEOK(L) + 1109 SIOS(L) = 11AA KIYEOK-SIOS(T) -->
<i>ᆪ</i>
<reset>ᄂ</reset> <!-- 1102 NIEUN(L) = 11AB NIEUN(T) -->
<i>ᆫ</i>
<reset>ᄂᄌ</reset> <!-- 1102 NIEUN(L) + 110C CIEUC(L) = 11AC NIEUN-CIEUC(T), also archaic 115C NIEUN-CIEUC(L) -->
<i>ᆬ</i>
<reset>ᄂᄒ</reset> <!-- 1102 NIEUN(L) + 1112 HIEUH(L) = 11AD NIEUN-HIEUH(T), also archaic 115D NIEUN-HIEUH(L) -->
<i>ᆭ</i>
<reset>ᄃ</reset> <!-- 1103 TIKEUT(L) = 11AE TIKEUT(T) -->
<i>ᆮ</i>
<reset>ᄃᄃ</reset> <!-- 1103 TIKEUT(L) + 1103 TIKEUT(L) = 1104 SSANGTIKEUT(L) -->
<i>ᄄ</i>
<reset>ᄅ</reset> <!-- 1105 RIEUL(L) = 11AF RIEUL(T) -->
<i>ᆯ</i>
<reset>ᄅᄀ</reset> <!-- 1105 RIEUL(L) + 1100 KIYEOK(L) = 11B0 RIEUL-KIYEOK(T) -->
<i>ᆰ</i>
<reset>ᄅᄆ</reset> <!-- 1105 RIEUL(L) + 1106 MIEUM(L) = 11B1 RIEUL-MIEUM(T) -->
<i>ᆱ</i>
<reset>ᄅᄇ</reset> <!-- 1105 RIEUL(L) + 1107 PIEUP(L) = 11B2 RIEUL-PIEUP(T) -->
<i>ᆲ</i>
<reset>ᄅᄉ</reset> <!-- 1105 RIEUL(L) + 1109 SIOS(L) = 11B3 RIEUL-SIOS(T) -->
<i>ᆳ</i>
<reset>ᄅᄐ</reset> <!-- 1105 RIEUL(L) + 1110 THIEUTH(L) = 11B4 RIEUL-THIEUTH(T) -->
<i>ᆴ</i>
<reset>ᄅᄑ</reset> <!-- 1105 RIEUL(L) + 1111 PHIEUPH(L) = 11B5 RIEUL-PHIEUPH(T) -->
<i>ᆵ</i>
<reset>ᄅᄒ</reset> <!-- 1105 RIEUL(L) + 1112 HIEUH(L) = 11B6 RIEUL-HIEUH(T), also archic 111A RIEUL-HIEUH(L) -->
<i>ᆶ</i>
<reset>ᄆ</reset> <!-- 1106 MIEUM(L) = 11B7 MIEUM(T) -->
<i>ᆷ</i>
<reset>ᄇ</reset> <!-- 1107 PIEUP(L) = 11B8 PIEUP(T) -->
<i>ᆸ</i>
<reset>ᄇᄇ</reset> <!-- 1107 PIEUP(L) + 1107 PIEUP(L) = 1108 SSANGPIEUP(L) -->
<i>ᄈ</i>
<reset>ᄇᄉ</reset> <!-- 1107 PIEUP(L) + 1109 SIOS(L) = 11B9 PIEUP-SIOS(T), also archaic 1121 PIEUP-SIOS(L) -->
<i>ᆹ</i>
<reset>ᄉ</reset> <!-- 1109 SIOS(L) = 11BA SIOS(T) -->
<i>ᆺ</i>
<reset>ᄉᄉ</reset> <!-- 1109 SIOS(L) + 1109 SIOS(L) = 110A SSANGSIOS(L) = 11BB SSANGSIOS(T) -->
<i>ᄊ</i>
<i>ᆻ</i>
<reset>ᄋ</reset> <!-- 110B IEUNG(L) = 11BC IEUNG(T) -->
<i>ᆼ</i>
<reset>ᄌ</reset> <!-- 110C CIEUC(L) = 11BD CIEUC(T) -->
<i>ᆽ</i>
<reset>ᄌᄌ</reset> <!-- 110C CIEUC(L) + 110C CIEUC(L) = 110D SSANGCIEUC(L) -->
<i>ᄍ</i>
<reset>ᄎ</reset> <!-- 110E CHIEUCH(L) = 11BE CHIEUCH(T) -->
<i>ᆾ</i>
<reset>ᄏ</reset> <!-- 110F KHIEUKH(L) = 11BF KHIEUKH(T) -->
<i>ᆿ</i>
<reset>ᄐ</reset> <!-- 1110 THIEUTH(L) = 11C0 THIEUTH(T) -->
<i>ᇀ</i>
<reset>ᄑ</reset> <!-- 1111 PHIEUPH(L) = 11C1 PHIEUPH(T) -->
<i>ᇁ</i>
<reset>ᄒ</reset> <!-- 1112 HIEUH(L) = 11C2 HIEUH(T) -->
<i>ᇂ</i>
<!-- Korean modern complex vowels, decompose -->
<reset>ᅡᅵ</reset> <!-- 1161 A(V) + 1175 I(V) = 1162 AE(V) -->
<i>ᅢ</i>
<reset>ᅣᅵ</reset> <!-- 1163 YA(V) + 1175 I(V) = 1164 YAE(V) -->
<i>ᅤ</i>
<reset>ᅥᅵ</reset> <!-- 1165 EO(V) + 1175 I(V) = 1166 E(V) -->
<i>ᅦ</i>
<reset>ᅧᅵ</reset> <!-- 1167 YEO(V) + 1175 I(V) = 1168 YE(V) -->
<i>ᅨ</i>
<reset>ᅩᅡ</reset> <!-- 1169 O(V) + 1161 A(V) = 116A WA(V) -->
<i>ᅪ</i>
<reset>ᅩᅡᅵ</reset> <!-- 1169 O(V) + 1161 A(V) + 1175 I(V) = 116B WAE(V) -->
<i>ᅫ</i>
<reset>ᅩᅵ</reset> <!-- 1169 O(V) + 1175 I(V) = 116C OE(V) -->
<i>ᅬ</i>
<reset>ᅮᅴ</reset> <!-- 116E U(V) + 1174 YI(V) = 116F WEO(V) -->
<i>ᅯ</i>
<reset>ᅮᅴᅵ</reset> <!-- 116E U(V) + 1174 YI(V) + 1175 I(V) = 1170 WE(V) -->
<i>ᅰ</i>
<reset>ᅮᅵ</reset> <!-- 116E U(V) + 1175 I(V) = 1171 WI(V) -->
<i>ᅱ</i>
</rules>
</collation>
<collation type="ducet">
<settings normalization="on" variableTop="&#x1D371;"/> <!--U+1D371 COUNTING ROD TENS DIGIT NINE-->
<rules>
<reset>&#x1680;</reset> <!--U+1680 OGHAM SPACE MARK-->
<p>`</p> <!--U+0060 GRAVE ACCENT-->
<t>&#xFF40;</t> <!--U+FF40 FULLWIDTH GRAVE ACCENT-->
<p>&#xB4;</p> <!--U+00B4 ACUTE ACCENT-->
<i>&#x384;</i> <!--U+0384 GREEK TONOS-->
<p>&#x2DC;</p> <!--U+02DC SMALL TILDE-->
<p>^</p> <!--U+005E CIRCUMFLEX ACCENT-->
<t>&#xFF3E;</t> <!--U+FF3E FULLWIDTH CIRCUMFLEX ACCENT-->
<p>&#xAF;</p> <!--U+00AF MACRON-->
<t>&#xFFE3;</t> <!--U+FFE3 FULLWIDTH MACRON-->
<reset>&#xFE4C;</reset> <!--U+FE4C DOUBLE WAVY OVERLINE-->
<p>&#x2D8;</p> <!--U+02D8 BREVE-->
<p>&#x2D9;</p> <!--U+02D9 DOT ABOVE-->
<p>&#xA8;</p> <!--U+00A8 DIAERESIS-->
<p>&#x2DA;</p> <!--U+02DA RING ABOVE-->
<p>&#x2DD;</p> <!--U+02DD DOUBLE ACUTE ACCENT-->
<p>&#x1FBD;</p> <!--U+1FBD GREEK KORONIS-->
<i>&#x1FBF;</i> <!--U+1FBF GREEK PSILI-->
<p>&#x1FFE;</p> <!--U+1FFE GREEK DASIA-->
<p>&#xB8;</p> <!--U+00B8 CEDILLA-->
<p>&#x2DB;</p> <!--U+02DB OGONEK-->
<reset>&#x2017;</reset> <!--U+2017 DOUBLE LOW LINE-->
<p>&#x1FC0;</p> <!--U+1FC0 GREEK PERISPOMENI-->
<p>&#xFBB2;</p> <!--U+FBB2 ARABIC SYMBOL DOT ABOVE-->
<p>&#xFBB3;</p> <!--U+FBB3 ARABIC SYMBOL DOT BELOW-->
<p>&#xFBB4;</p> <!--U+FBB4 ARABIC SYMBOL TWO DOTS ABOVE-->
<p>&#xFBB5;</p> <!--U+FBB5 ARABIC SYMBOL TWO DOTS BELOW-->
<p>&#xFBB6;</p> <!--U+FBB6 ARABIC SYMBOL THREE DOTS ABOVE-->
<p>&#xFBB7;</p> <!--U+FBB7 ARABIC SYMBOL THREE DOTS BELOW-->
<p>&#xFBB8;</p> <!--U+FBB8 ARABIC SYMBOL THREE DOTS POINTING DOWNWARDS ABOVE-->
<p>&#xFBB9;</p> <!--U+FBB9 ARABIC SYMBOL THREE DOTS POINTING DOWNWARDS BELOW-->
<p>&#xFBBA;</p> <!--U+FBBA ARABIC SYMBOL FOUR DOTS ABOVE-->
<p>&#xFBBB;</p> <!--U+FBBB ARABIC SYMBOL FOUR DOTS BELOW-->
<p>&#xFBBC;</p> <!--U+FBBC ARABIC SYMBOL DOUBLE VERTICAL BAR BELOW-->
<p>&#xFBBD;</p> <!--U+FBBD ARABIC SYMBOL TWO DOTS VERTICALLY ABOVE-->
<p>&#xFBBE;</p> <!--U+FBBE ARABIC SYMBOL TWO DOTS VERTICALLY BELOW-->
<p>&#xFBBF;</p> <!--U+FBBF ARABIC SYMBOL RING-->
<p>&#xFBC0;</p> <!--U+FBC0 ARABIC SYMBOL SMALL TAH ABOVE-->
<p>&#xFBC1;</p> <!--U+FBC1 ARABIC SYMBOL SMALL TAH BELOW-->
<p>&#x309B;</p> <!--U+309B KATAKANA-HIRAGANA VOICED SOUND MARK-->
<p>&#x309C;</p> <!--U+309C KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK-->
<reset>&#x1805;</reset> <!--U+1805 MONGOLIAN FOUR DOTS-->
<p>&#xF14;</p> <!--U+0F14 TIBETAN MARK GTER TSHEG-->
<reset>&#x10FB;</reset> <!--U+10FB GEORGIAN PARAGRAPH SEPARATOR-->
<p>&#x1360;</p> <!--U+1360 ETHIOPIC SECTION MARK-->
<reset>&#x10101;</reset> <!--U+10101 AEGEAN WORD SEPARATOR DOT-->
<p>&#x10102;</p> <!--U+10102 AEGEAN CHECK MARK-->
<reset>&#xFD3F;</reset> <!--U+FD3F ORNATE RIGHT PARENTHESIS-->
<p>&#xA7;</p> <!--U+00A7 SECTION SIGN-->
<p>&#xB6;</p> <!--U+00B6 PILCROW SIGN-->
<reset>&#x204B;</reset> <!--U+204B REVERSED PILCROW SIGN-->
<p>&#xA9;</p> <!--U+00A9 COPYRIGHT SIGN-->
<p>&#xAE;</p> <!--U+00AE REGISTERED SIGN-->
<reset>&#xFF0F;</reset> <!--U+FF0F FULLWIDTH SOLIDUS-->
<p>&#x2044;</p> <!--U+2044 FRACTION SLASH-->
<reset>&#xFE60;</reset> <!--U+FE60 SMALL AMPERSAND-->
<p>&#x214B;</p> <!--U+214B TURNED AMPERSAND-->
<reset>&#x60A;</reset> <!--U+060A ARABIC-INDIC PER TEN THOUSAND SIGN-->
<p>&#x2052;</p> <!--U+2052 COMMERCIAL MINUS SIGN-->
<reset>&#x1C3F;</reset> <!--U+1C3F LEPCHA PUNCTUATION TSHOOK-->
<p>&#x1940;</p> <!--U+1940 LIMBU SIGN LOO-->
<reset>&#x104F;</reset> <!--U+104F MYANMAR SYMBOL GENITIVE-->
<p>&#x109E;</p> <!--U+109E MYANMAR SYMBOL SHAN ONE-->
<p>&#x109F;</p> <!--U+109F MYANMAR SYMBOL SHAN EXCLAMATION-->
<p>&#xAA77;</p> <!--U+AA77 MYANMAR SYMBOL AITON EXCLAMATION-->
<p>&#xAA78;</p> <!--U+AA78 MYANMAR SYMBOL AITON ONE-->
<p>&#xAA79;</p> <!--U+AA79 MYANMAR SYMBOL AITON TWO-->
<p>&#x17D7;</p> <!--U+17D7 KHMER SIGN LEK TOO-->
<reset>&#xA839;</reset> <!--U+A839 NORTH INDIC QUANTITY MARK-->
<p>&#xE4F;</p> <!--U+0E4F THAI CHARACTER FONGMAN-->
<reset>&#xFFE4;</reset> <!--U+FFE4 FULLWIDTH BROKEN BAR-->
<p>&#x2016;</p> <!--U+2016 DOUBLE VERTICAL LINE-->
<reset>&#x2767;</reset> <!--U+2767 ROTATED FLORAL HEART BULLET-->
<p>&#x2768;</p> <!--U+2768 MEDIUM LEFT PARENTHESIS ORNAMENT-->
<p>&#x2769;</p> <!--U+2769 MEDIUM RIGHT PARENTHESIS ORNAMENT-->
<p>&#x276A;</p> <!--U+276A MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT-->
<p>&#x276B;</p> <!--U+276B MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT-->
<p>&#x276C;</p> <!--U+276C MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT-->
<p>&#x276D;</p> <!--U+276D MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT-->
<p>&#x276E;</p> <!--U+276E HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT-->
<p>&#x276F;</p> <!--U+276F HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT-->
<p>&#x2770;</p> <!--U+2770 HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT-->
<p>&#x2771;</p> <!--U+2771 HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT-->
<p>&#x2772;</p> <!--U+2772 LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT-->
<p>&#x2773;</p> <!--U+2773 LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT-->
<p>&#x2774;</p> <!--U+2774 MEDIUM LEFT CURLY BRACKET ORNAMENT-->
<p>&#x2775;</p> <!--U+2775 MEDIUM RIGHT CURLY BRACKET ORNAMENT-->
<reset>&#x27C4;</reset> <!--U+27C4 OPEN SUPERSET-->
<p>&#x27C5;</p> <!--U+27C5 LEFT S-SHAPED BAG DELIMITER-->
<p>&#x27C6;</p> <!--U+27C6 RIGHT S-SHAPED BAG DELIMITER-->
<reset>&#x27E5;</reset> <!--U+27E5 WHITE SQUARE WITH RIGHTWARDS TICK-->
<p>&#x27E6;</p> <!--U+27E6 MATHEMATICAL LEFT WHITE SQUARE BRACKET-->
<p>&#x27E7;</p> <!--U+27E7 MATHEMATICAL RIGHT WHITE SQUARE BRACKET-->
<p>&#x27E8;</p> <!--U+27E8 MATHEMATICAL LEFT ANGLE BRACKET-->
<p>&#x27E9;</p> <!--U+27E9 MATHEMATICAL RIGHT ANGLE BRACKET-->
<p>&#x27EA;</p> <!--U+27EA MATHEMATICAL LEFT DOUBLE ANGLE BRACKET-->
<p>&#x27EB;</p> <!--U+27EB MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET-->
<reset>&#x29D7;</reset> <!--U+29D7 BLACK HOURGLASS-->
<p>&#x29D8;</p> <!--U+29D8 LEFT WIGGLY FENCE-->
<p>&#x29D9;</p> <!--U+29D9 RIGHT WIGGLY FENCE-->
<p>&#x29DA;</p> <!--U+29DA LEFT DOUBLE WIGGLY FENCE-->
<p>&#x29DB;</p> <!--U+29DB RIGHT DOUBLE WIGGLY FENCE-->
<reset>&#x10A7E;</reset> <!--U+10A7E OLD SOUTH ARABIAN NUMBER FIFTY-->
<p>&#x10A7F;</p> <!--U+10A7F OLD SOUTH ARABIAN NUMERIC INDICATOR-->
<reset>&#xA7A6;</reset> <!--U+A7A6 LATIN CAPITAL LETTER R WITH OBLIQUE STROKE / 20EB-->
<x><t>&#x20A8;</t><extend>s</extend></x> <!--U+20A8 RUPEE SIGN / 0073-->
<reset>&#xFDF6;</reset> <!--U+FDF6 ARABIC LIGATURE RASOUL ISOLATED FORM / 0633 0648 0644-->
<x><t>&#xFDFC;</t><extend>&#x6CC;&#x627;&#x644;</extend></x> <!--U+FDFC RIAL SIGN / 06CC 0627 0644-->
</rules>
</collation>
</collations>
<!-- for UCA rules please see: http://www.unicode.org/reports/tr10/ -->
</ldml>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment