Skip to content

Instantly share code, notes, and snippets.

@hippietrail
Created February 18, 2012 11:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hippietrail/1858881 to your computer and use it in GitHub Desktop.
Save hippietrail/1858881 to your computer and use it in GitHub Desktop.
Sort and normalize Georgian requested entries page of English Wiktionary
<html>
<head>
<!--Load the AJAX API-->
<style type="text/css">
</style>
<script type="text/javascript" src="http://code.jquery.com/jquery-latest.min.js"></script>
<script type="text/javascript">
$(document).ready(function(){
// initialize stuff
// ...
$.getJSON("http://en.wiktionary.org/w/api.php?callback=?",
{
action: "query",
format: "json",
titles: "Wiktionary:Requested_entries_(Georgian)",
prop: "revisions",
rvprop: "content"
},
function(data) {
var src = data.query.pages[Object.keys(data.query.pages)[0]].revisions[0]["*"];
var heads = src.match(/^==.*==$/mg);
var bodies = src.split(/^==.*==$/mg);
for (var i = 0; i < heads.length; ++i) {
var x = heads[i].match(/^==([^ ]*)\s+\[(.*)\]==$/);
if (x) {
var g = x[1], ipa = x[2], b = bodies[i+1];
var b0 = b;
// match [[kaka]] (enen) if "foo" is georgian (and "bar" is latin)
b = b.replace(/\[\[(.*?[\u10a0-\u10fb].*?)]]\s*\((.*?[a-zA-Z].*?)\)/g, "{{l|ka|$1|tr=$2}}");
// match [[kaka]] as long as at least one character is georgian
b = b.replace(/\[\[(.*?[\u10a0-\u10fb].*?)]]/g, "{{l|ka|$1}}");
// match [[ruru]] as long as at least one character is russian / cyrillic
b = b.replace(/\[\[(.*?[\u0400-\u04ff].*?)]]/g, "{{l|ru|$1}}");
// match [[hyhy]] as long as at least one character is armenian
b = b.replace(/\[\[(.*?[\u0531-\u058a].*?)]]/g, "{{l|hy|$1}}");
// {{l|ka|kaka}} -> {{l|ka|kaka|tr=trtr}}
b = b.replace(/{{l\|ka\|([^|]*?[\u10a0-\u10fb][|^}]*?)}}/g, replacetr);
var unchanged = (b0 == b);
var lines = b.split(/\r?\n/);
var obj = {};
var arr = [];
var oldkey;
$.each(lines, function(i, l) {
var m;
var k = "";
// old style links [[foo]] (bar)
if (m = l.match(/^\*\s*\[\[(.*?)\]\]/)) {
k = m[1];
k = k.replace(/[- ]/g, "");
obj[k] = l;
arr.push(k);
// template style links {{l|ka|foo|tr=bar}}
} else if (m = l.match(/^\*\s*{{l\|ka\|([^\|}]*?)[\|}]/)) {
k = m[1];
k = k.replace(/[- ]/g, "");
obj[k] = l;
arr.push(k);
} else if (l.match(/^\*:/)) {
obj[oldkey] += "\n" + l;
}
if (k != "") {
oldkey = k;
}
});
var sarr = arr.slice(0);
sarr.sort();
var sorted = arraycompare(sarr, arr);
var after = "";
if (!sorted || !unchanged) {
after = "<pre>";
for (var j = 0; j < sarr.length; ++j) {
if (j != 0) {
after += "\n";
}
after += obj[sarr[j]];
}
after += "</pre>";
}
var html = "<div id=\"" + g + "\"><h2>" + g + " <tt>[" + ipa + "]</tt></h2><dl>"
+ "</dl>" + (sorted ? "sorted" : "NOT SORTED") + "\n" + (unchanged ? "unchanged" : "CHANGED") + "</div>"
+ after;
$(html).appendTo("#app");
}
}
}
);
});
function arraycompare(a, b) {
for (var i = 0; i < a.length; ++i) {
if (a[i] != b[i]) {
return false;
}
}
return true;
}
function replacetr(m, c) {
var tr = transliterate('ka', 'tr', c);
r = "{{l|ka|" + c + "|tr=" + tr + "}}";
return r;
}
var ka_tr = {
'ა': 'a',
'ბ': 'b',
'გ': 'g',
'დ': 'd',
'ე': 'e',
'ვ': 'v',
'ზ': 'z',
'თ': [ 't\'', 't', 'T' ],
'ი': 'i',
'კ': [ 'k', 'k\'' ],
'ლ': 'l',
'მ': 'm',
'ნ': 'n',
'ო': 'o',
'პ': [ 'p', 'p\'' ],
'ჟ': [ 'ž', 'zh', 'j', 'J' ],
'რ': 'r',
'ს': 's',
'ტ': [ 't', 't\'' ],
'უ': 'u',
'ფ': [ 'p\'', 'p' ],
'ქ': [ 'k\'', 'k', 'q' ],
'ღ': [ 'ḡ', 'gh', 'R' ],
'ყ': [ 'q', 'y', 'q' ],
'შ': [ 'š', 'sh', 'S' ],
'ჩ': [ 'č\'', 'ch', 'C' ],
'ც': [ 'c\'', 'c', 'ts' ],
'ძ': [ 'j', 'dz', 'Z' ],
'წ': [ 'c', 'c\'', 'ts', 'w', 'ts\'' ],
'ჭ': [ 'č', 'ch', 'W', 'ch\'' ],
'ხ': [ 'x', 'kh' ],
'ჯ': [ 'ǰ', 'j' ],
'ჰ': 'h',
};
var tr_ka = {
'a': 'ა',
'b': 'ბ',
'g': 'გ',
'd': 'დ',
'e': 'ე',
'v': 'ვ',
'z': 'ზ',
't\'': 'თ', // ტ
'i': 'ი',
'k': 'კ', // ქ
'l': 'ლ',
'm': 'მ',
'n': 'ნ',
'o': 'ო',
'p': 'პ', // ფ
'ž': 'ჟ',
'r': 'რ',
's': 'ს',
't': 'ტ', // თ
'u': 'უ',
'p\'': 'ფ', // პ
'k\'': 'ქ', // კ
'ḡ': 'ღ',
'q': 'ყ', // ქ
'š': 'შ',
'č\'': 'ჩ',
'c\'': 'ც',
'j': 'ძ',
'c': 'წ',
'č': 'ჭ',
'x': 'ხ',
'ǰ': 'ჯ',
'h': 'ჰ',
};
// J̌ ǰ
function transliterate(src, dst, srctxt) {
var l = srctxt.length;
var i = 0;
var dsttxt = '';
var hash = src === 'ka' ? ka_tr : tr_ka;
for (i = 0; i < l; ++i) {
var a = srctxt.charAt(i);
var b = i+1 < l ? srctxt.charAt(i+1) : '';
var c = a + b;
var x;
if (c in hash) {
x = hash[c];
++i;
} else if (a in hash) {
x = hash[a];
} else {
x = a;
}
if ($.isArray(x))
x = x[0];
dsttxt += x;
}
return dsttxt;
}
</script>
</head>
<body>
<div id="app">
</div>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment