Last active
December 31, 2019 18:26
-
-
Save giuseppeM99/a5e708b192cecbfd1b1296241eee223b to your computer and use it in GitHub Desktop.
PHP TG Entity parser (LEGACY)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function parseHTML($text, $entities) | |
{ | |
if (empty($entities)) { | |
return htmlspecialchars($text); | |
} | |
$bo = mb_convert_encoding("<b>", "UTF-16LE"); | |
$bc = mb_convert_encoding("</b>", "UTF-16LE"); | |
$io = mb_convert_encoding("<i>", "UTF-16LE"); | |
$ic = mb_convert_encoding("</i>", "UTF-16LE"); | |
$co = mb_convert_encoding("<code>", "UTF-16LE"); | |
$cc = mb_convert_encoding("</code>", "UTF-16LE"); | |
$po = mb_convert_encoding("<pre>", "UTF-16LE"); | |
$pc = mb_convert_encoding("</pre>", "UTF-16LE"); | |
$ac = mb_convert_encoding("</a>", "UTF-16LE"); | |
$sc = mb_convert_encoding("</s>", "UTF-16LE"); | |
$uc = mb_convert_encoding("</u>", "UTF-16LE"); | |
$so = mb_convert_encoding("<s>", "UTF-16LE"); | |
$uo = mb_convert_encoding("<u>", "UTF-16LE"); | |
$text = mb_convert_encoding($text, "UTF-16LE", "UTF-8"); | |
$o = 0; | |
$lo = 0; | |
$nt = ""; | |
foreach ($entities as $entity) { | |
switch ($entity['type']) { | |
case 'bold': | |
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE"); | |
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE"); | |
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo); | |
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE"); | |
$nt .= sprintf("$bo%s$bc", $escaped); | |
$lo = $entity['offset']*2 + $entity['length']*2; | |
break; | |
case 'italic': | |
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE"); | |
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE"); | |
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo); | |
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE"); | |
$nt .= sprintf("$io%s$ic", $escaped); | |
$lo = $entity['offset']*2 + $entity['length']*2; | |
break; | |
case 'code': | |
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE"); | |
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE"); | |
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo); | |
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE"); | |
$nt .= sprintf("$co%s$cc", $escaped); | |
$lo = $entity['offset']*2 + $entity['length']*2; | |
break; | |
case 'pre': | |
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE"); | |
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE"); | |
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo); | |
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE"); | |
$nt .= sprintf("$po%s$pc", $escaped); | |
$lo = $entity['offset']*2 + $entity['length']*2; | |
break; | |
case 'strikethrough': | |
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE"); | |
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE"); | |
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo); | |
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE"); | |
$nt .= sprintf("$so%s$sc", $escaped); | |
$lo = $entity['offset']*2 + $entity['length']*2; | |
break; | |
case 'underline': | |
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE"); | |
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE"); | |
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo); | |
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE"); | |
$nt .= sprintf("$uo%s$uc", $escaped); | |
$lo = $entity['offset']*2 + $entity['length']*2; | |
break; | |
case 'text_mention': | |
$entity['url'] = "tg://user?id=".$entity['user']['id']; | |
case 'text_link': | |
$ao = mb_convert_encoding("<a href='".addslashes($entity['url'])."'>", "UTF-16LE"); | |
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE"); | |
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE"); | |
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo); | |
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE"); | |
$nt .= sprintf("$ao%s$ac", $escaped); | |
$lo = $entity['offset']*2 + $entity['length']*2; | |
unset($ao); | |
break; | |
default: | |
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE"); | |
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE"); | |
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE"); | |
$nt .= $escaped; | |
$lo = $entity['offset']*2 + $entity['length']*2; | |
break; | |
} | |
} | |
$nt = mb_convert_encoding($nt, "UTF-8", "UTF-16LE"); | |
$nt .= htmlspecialchars(mb_convert_encoding(substr($text, $lo), "UTF-8", "UTF-16LE")); | |
return $nt; | |
} | |
function parseMD($text, $entities) | |
{ | |
$bo = mb_convert_encoding("*", "UTF-16LE"); | |
$bc = $bo; | |
$io = mb_convert_encoding("_", "UTF-16LE"); | |
$ic = $io; | |
$co = mb_convert_encoding("`", "UTF-16LE"); | |
$cc = $co; | |
$po = mb_convert_encoding("```", "UTF-16LE"); | |
$pc = $po; | |
$ao = mb_convert_encoding("[", "UTF-16LE"); | |
$ac = mb_convert_encoding("]", "UTF-16LE"); | |
$text = mb_convert_encoding($text, "UTF-16LE", "UTF-8"); | |
$o = 0; | |
foreach ($entities as $entity) { | |
switch ($entity['type']) { | |
case 'bold': | |
$replace = substr($text, $o+$entity['offset']*2, $entity['length']*2); | |
$text = substr_replace($text, sprintf("$bo%s$bc", $replace), $o+$entity['offset']*2, $entity['length']*2); | |
$o += 4; | |
break; | |
case 'italic': | |
$replace = substr($text, $o+$entity['offset']*2, $entity['length']*2); | |
$text = substr_replace($text, sprintf("$io%s$ic", $replace), $o+$entity['offset']*2, $entity['length']*2); | |
$o += 4; | |
break; | |
case 'code': | |
$replace = substr($text, $o+$entity['offset']*2, $entity['length']*2); | |
$text = substr_replace($text, sprintf("$co%s$cc", $replace), $o+$entity['offset']*2, $entity['length']*2); | |
$o += 4; | |
break; | |
case 'pre': | |
$replace = substr($text, $o+$entity['offset']*2, $entity['length']*2); | |
$text = substr_replace($text, sprintf("$po%s$pc", $replace), $o+$entity['offset']*2, $entity['length']*2); | |
$o += 12; | |
break; | |
case 'strikethrough': | |
break; | |
case 'underline': | |
break; | |
case 'text_mention': | |
$entity['url'] = "tg://user?id=".$entity['user']['id']; | |
case 'text_link': | |
$u = mb_convert_encoding("(".addslashes($entity['url']).")", "UTF-16LE"); | |
$replace = substr($text, $o+$entity['offset']*2, $entity['length']*2); | |
$text = substr_replace($text, sprintf("$ao%s$ac$u", $replace), $o+$entity['offset']*2, $entity['length']*2); | |
$o += 4 + strlen($u); | |
unset($u); | |
break; | |
} | |
} | |
return mb_convert_encoding($text, "UTF-8", "UTF-16LE"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment