Skip to content

Instantly share code, notes, and snippets.

@hrishikeshrt
Last active December 20, 2021 12:06
Show Gist options
  • Save hrishikeshrt/d38536b640d79dccb6d52fd8baeaac1a to your computer and use it in GitHub Desktop.
Save hrishikeshrt/d38536b640d79dccb6d52fd8baeaac1a to your computer and use it in GitHub Desktop.
// ==UserScript==
// @name Transliterate DCS
// @namespace http://www.gist.github.com/
// @version 1.1
// @description Transliterate IAST text from DCS into Devanagari
// @author Hrishikesh Terdalkar
// @match http://www.sanskrit-linguistics.org/dcs/index.php?contents=texte&IDTextDisplay=*
// @require https://raw.githubusercontent.com/sanskrit/sanscript.js/master/sanscript.js
// ==/UserScript==
(function() {
'use strict';
const corpus_selector = document.getElementById("text_id");
const options = corpus_selector.getElementsByTagName("option");
for(var i=0; i < options.length; ++i) {
options[i].innerHTML = Sanscript.t(options[i].innerHTML.toLowerCase(), 'iast', 'devanagari');
}
const button_1 = document.createElement('button');
button_1.innerHTML = "Transliterate";
corpus_selector.parentNode.insertBefore(button_1, corpus_selector.nextSibling);
button_1.addEventListener("click", function () {
transliterate_sentences();
transliterate_analysis();
});
})();
function transliterate_sentences() {
const display_text = document.getElementById("sentences");
const sentences = display_text.getElementsByClassName("sentence_div");
for(var i=0; i < sentences.length; ++i) {
var _html = sentences[i].innerHTML;
var _parts = split_text_html(_html);
for (var j=0; j < _parts.length; ++j) {
if (_parts[j] == "&nbsp;") {
break;
}
if (_parts[j].match(/^[^&\[<]*$/g)) {
_parts[j] = Sanscript.t(_parts[j], 'iast', 'devanagari');
}
}
_html = _parts.join("");
sentences[i].innerHTML = _html;
}
}
function transliterate_analysis() {
const display_text = document.getElementById("sentences");
const lemmas = display_text.getElementsByClassName("text-lemma-link");
for(var i=0; i < lemmas.length; ++i) {
lemmas[i].innerHTML = Sanscript.t(lemmas[i].innerHTML, 'iast', 'devanagari');
}
}
function split_text_html(s) {
var patterns = [
"&[^; ]*;", // valid html token
"<[^>]*>", // valid html tag
"\\\[[^\\\]]*\\\]", // text in brackets
"[^&\\\[<]*" // any text until the start of next tag or token
];
/* NOTE:
If the & is not used as a start of an html tag
(as in, there is a whitespace after it before the ";" (or no ";" at all))
then that "&" will get ommitted by this
*/
var pattern = "";
for (var i=0; i < patterns.length - 1; ++i) {
pattern += patterns[i] + "|";
}
pattern += patterns[i];
var re = new RegExp(pattern, "g");
return s.match(re)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment