Skip to content

Instantly share code, notes, and snippets.

@mmahalwy
Last active August 29, 2015 14:19
Show Gist options
  • Save mmahalwy/8f59267fd1162e94690a to your computer and use it in GitHub Desktop.
Save mmahalwy/8f59267fd1162e94690a to your computer and use it in GitHub Desktop.
Transliteration algyyy
class TransliterationUtils
results_per_page = nil
def do_transliteration_search(search_string, page, results_per_page)
end
def get_num_ayahs_ayah
return [7,286,200,176,120,165,206,75,129,109,123,111,
43,52,99,128,111,110,98,135,112,78,118,64,77,
227,93,88,69,60,34,30,73,54,45,83,182,88,75,85,
54,53,89,59,37,35,38,29,18,45,60,49,62,55,78,96,
29,22,24,13,14,11,11,18,12,12,30,52,52,44,28,28,
20,56,40,31,50,40,46,42,29,19,36,25,22,17,19,26,
30,20,15,21,11,8,8,19,5,8,8,11,11,8,3,9,5,4,7,3,
6,3,5,4,5,6]
end
def compare_text(ayah_text, search_str)
str = ''
mp = 70
best = 0
cutoff = 80
factor = ayah_text.length / search_str.length
mp = mp - (factory / 2) if factory > 10
val = 0
while 1
return if ayah_text.length == 1
end
end
end
<?php
class TransliterationUtilities {
var $resultsPerPage;
function doTransliterationSearch($searchString, $page, $resultsPerPage){
$ans = array();
$normalized = $this->prepareText($searchString);
if (strlen($normalized) == 0) return $ans;
if (strlen($normalized) > 255)
$normalized = substr($normalized, 0, 200);
$this->resultsPerPage = $resultsPerPage;
$resStorage = new SearchResults($this);
$startSura = 1;
$endSura = 114;
$ayahsPerSuraArr = $this->getNumAyahsArray();
for ($sura = $startSura; $sura<=$endSura; $sura++){
$numayahs = $ayahsPerSuraArr[$sura-1];
$ayahsArray = TransliterationTable::getEntireSuraText($sura);
for ($ayah = 1; $ayah<=$numayahs; $ayah++){
$row = $ayahsArray[$ayah-1];
$ayahtext = $row['ayahtext'];
$searchtext = $row['searchtext'];
$relevance = $this->compareText($searchtext, $normalized);
if ($relevance > 0)
$resStorage->addResult($sura, $ayah, $ayahtext, $relevance);
}
}
$ans = $resStorage->sortResults($page, $resultsPerPage);
return $ans;
}
function getNumAyahsArray(){
return array(7,286,200,176,120,165,206,75,129,109,123,111,
43,52,99,128,111,110,98,135,112,78,118,64,77,
227,93,88,69,60,34,30,73,54,45,83,182,88,75,85,
54,53,89,59,37,35,38,29,18,45,60,49,62,55,78,96,
29,22,24,13,14,11,11,18,12,12,30,52,52,44,28,28,
20,56,40,31,50,40,46,42,29,19,36,25,22,17,19,26,
30,20,15,21,11,8,8,19,5,8,8,11,11,8,3,9,5,4,7,3,
6,3,5,4,5,6);
}
function compareText($ayahtext, $searchstr){
$str = "";
$mp = 70;
$best = 0;
$cutoff = 80;
# adjust percentage for short queries against long ayahs
$factor = strlen($ayahtext) / strlen($searchstr);
if ($factor > 10){ $mp = $mp - ($factor / 2); }
$val = 0;
while (1){
if (strlen($ayahtext)==1)
return 0;
$ayahtext = substr($ayahtext, $val, strlen($ayahtext));
if ($val == 0){ $val = 1; }
if (($ayahtext[0]!=$searchstr[0]) &&
(!strpos($ayahtext, $searchstr[0])))
return $best;
$ayahtext = substr($ayahtext, strpos($ayahtext, $searchstr[0]));
$len = min(strlen($ayahtext), strlen($searchstr));
$tstr = substr($ayahtext, 0, $len);
$tmp = abs(levenshtein($searchstr, $tstr));
$percent = 100 * (1.00 - ((float)$tmp / strlen($tstr)));
if ($percent > $mp){
// keep track of best result so far
if ($percent > $best){ $best = $percent; }
// if best result over cutoff, return
if ($best > $cutoff){ return $best; }
}
}
return $best;
}
function prepareText($str){
# $str = str_replace("AA", "a", $str);
$str = str_replace("oo", "u", $str);
# $str = str_replace("iA", "i", $str);
# $str = str_replace("i A", "i", $str);
# fix the zeena bug from sheikh jamaal's class
# $str = str_replace("th", "z", $str);
$str = str_replace("-", "", $str);
$str = str_replace("/", "", $str);
$str = strtolower($str);
$str = str_replace('aa', 'a', $str);
$str = str_replace('ia', 'i', $str);
$str = str_replace('i a', 'i', $str);
$str = str_replace(" ", "", $str);
# return strtolower($str);
return $str;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment