Skip to content

Instantly share code, notes, and snippets.

@giuseppeM99
Last active December 31, 2019 18:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save giuseppeM99/a5e708b192cecbfd1b1296241eee223b to your computer and use it in GitHub Desktop.
Save giuseppeM99/a5e708b192cecbfd1b1296241eee223b to your computer and use it in GitHub Desktop.
PHP TG Entity parser (LEGACY)
<?php
function parseHTML($text, $entities)
{
if (empty($entities)) {
return htmlspecialchars($text);
}
$bo = mb_convert_encoding("<b>", "UTF-16LE");
$bc = mb_convert_encoding("</b>", "UTF-16LE");
$io = mb_convert_encoding("<i>", "UTF-16LE");
$ic = mb_convert_encoding("</i>", "UTF-16LE");
$co = mb_convert_encoding("<code>", "UTF-16LE");
$cc = mb_convert_encoding("</code>", "UTF-16LE");
$po = mb_convert_encoding("<pre>", "UTF-16LE");
$pc = mb_convert_encoding("</pre>", "UTF-16LE");
$ac = mb_convert_encoding("</a>", "UTF-16LE");
$sc = mb_convert_encoding("</s>", "UTF-16LE");
$uc = mb_convert_encoding("</u>", "UTF-16LE");
$so = mb_convert_encoding("<s>", "UTF-16LE");
$uo = mb_convert_encoding("<u>", "UTF-16LE");
$text = mb_convert_encoding($text, "UTF-16LE", "UTF-8");
$o = 0;
$lo = 0;
$nt = "";
foreach ($entities as $entity) {
switch ($entity['type']) {
case 'bold':
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE");
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE");
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo);
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE");
$nt .= sprintf("$bo%s$bc", $escaped);
$lo = $entity['offset']*2 + $entity['length']*2;
break;
case 'italic':
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE");
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE");
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo);
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE");
$nt .= sprintf("$io%s$ic", $escaped);
$lo = $entity['offset']*2 + $entity['length']*2;
break;
case 'code':
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE");
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE");
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo);
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE");
$nt .= sprintf("$co%s$cc", $escaped);
$lo = $entity['offset']*2 + $entity['length']*2;
break;
case 'pre':
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE");
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE");
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo);
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE");
$nt .= sprintf("$po%s$pc", $escaped);
$lo = $entity['offset']*2 + $entity['length']*2;
break;
case 'strikethrough':
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE");
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE");
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo);
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE");
$nt .= sprintf("$so%s$sc", $escaped);
$lo = $entity['offset']*2 + $entity['length']*2;
break;
case 'underline':
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE");
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE");
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo);
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE");
$nt .= sprintf("$uo%s$uc", $escaped);
$lo = $entity['offset']*2 + $entity['length']*2;
break;
case 'text_mention':
$entity['url'] = "tg://user?id=".$entity['user']['id'];
case 'text_link':
$ao = mb_convert_encoding("<a href='".addslashes($entity['url'])."'>", "UTF-16LE");
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE");
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE");
//$nt .= substr($text, $lo, $entity['offset']*2 -$lo);
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE");
$nt .= sprintf("$ao%s$ac", $escaped);
$lo = $entity['offset']*2 + $entity['length']*2;
unset($ao);
break;
default:
$replace = mb_convert_encoding(substr($text, $o+$entity['offset']*2, $entity['length']*2), "UTF-8", "UTF-16LE");
$escaped = mb_convert_encoding(htmlspecialchars($replace), "UTF-16LE");
$nt .= mb_convert_encoding(htmlspecialchars(mb_convert_encoding(substr($text, $lo, $entity['offset']*2 -$lo), "UTF-8", "UTF-16LE")), "UTF-16LE");
$nt .= $escaped;
$lo = $entity['offset']*2 + $entity['length']*2;
break;
}
}
$nt = mb_convert_encoding($nt, "UTF-8", "UTF-16LE");
$nt .= htmlspecialchars(mb_convert_encoding(substr($text, $lo), "UTF-8", "UTF-16LE"));
return $nt;
}
function parseMD($text, $entities)
{
$bo = mb_convert_encoding("*", "UTF-16LE");
$bc = $bo;
$io = mb_convert_encoding("_", "UTF-16LE");
$ic = $io;
$co = mb_convert_encoding("`", "UTF-16LE");
$cc = $co;
$po = mb_convert_encoding("```", "UTF-16LE");
$pc = $po;
$ao = mb_convert_encoding("[", "UTF-16LE");
$ac = mb_convert_encoding("]", "UTF-16LE");
$text = mb_convert_encoding($text, "UTF-16LE", "UTF-8");
$o = 0;
foreach ($entities as $entity) {
switch ($entity['type']) {
case 'bold':
$replace = substr($text, $o+$entity['offset']*2, $entity['length']*2);
$text = substr_replace($text, sprintf("$bo%s$bc", $replace), $o+$entity['offset']*2, $entity['length']*2);
$o += 4;
break;
case 'italic':
$replace = substr($text, $o+$entity['offset']*2, $entity['length']*2);
$text = substr_replace($text, sprintf("$io%s$ic", $replace), $o+$entity['offset']*2, $entity['length']*2);
$o += 4;
break;
case 'code':
$replace = substr($text, $o+$entity['offset']*2, $entity['length']*2);
$text = substr_replace($text, sprintf("$co%s$cc", $replace), $o+$entity['offset']*2, $entity['length']*2);
$o += 4;
break;
case 'pre':
$replace = substr($text, $o+$entity['offset']*2, $entity['length']*2);
$text = substr_replace($text, sprintf("$po%s$pc", $replace), $o+$entity['offset']*2, $entity['length']*2);
$o += 12;
break;
case 'strikethrough':
break;
case 'underline':
break;
case 'text_mention':
$entity['url'] = "tg://user?id=".$entity['user']['id'];
case 'text_link':
$u = mb_convert_encoding("(".addslashes($entity['url']).")", "UTF-16LE");
$replace = substr($text, $o+$entity['offset']*2, $entity['length']*2);
$text = substr_replace($text, sprintf("$ao%s$ac$u", $replace), $o+$entity['offset']*2, $entity['length']*2);
$o += 4 + strlen($u);
unset($u);
break;
}
}
return mb_convert_encoding($text, "UTF-8", "UTF-16LE");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment