Skip to content

Instantly share code, notes, and snippets.

@BenMorel
Created January 11, 2017 00:00
Show Gist options
  • Save BenMorel/5ab7d8cd55613e304c5d0a7a302e4569 to your computer and use it in GitHub Desktop.
Save BenMorel/5ab7d8cd55613e304c5d0a7a302e4569 to your computer and use it in GitHub Desktop.
Converts plain text to HTML
<?php
/**
* Converts a text message to HTML.
*
* The following transformations are applied:
* - Special characters are encoded to HTML entities,
* - Lines of text separated by an emtpy line are converted to paragraphs,
* - Newline characters are converted to `<br>`,
* - Links and e-mail addresses are converted to HTML links,
* - Tab-separated data is converted to tables.
*/
class TextHtmlConverter
{
/**
* @var bool
*/
private $isXhtml;
/**
* @var bool
*/
private $prettyPrint;
/**
* @var int
*/
private $indentLevel = 0;
/**
* @var string
*/
private $html;
/**
* Class constructor.
*
* @param bool $isXhtml
* @param bool $prettyPrint
*/
public function __construct(bool $isXhtml = true, bool $prettyPrint = false)
{
$this->isXhtml = $isXhtml;
$this->prettyPrint = $prettyPrint;
}
/**
* @param string $text
*
* @return string
*/
public function convert(string $text) : string
{
$this->html = '';
$lines = $this->splitLines($text);
$groups = [];
$group = [];
foreach ($lines as $line) {
if ($line != '') {
$group[] = $line;
} else {
if ($group) {
$groups[] = $group;
$group = [];
}
}
}
if ($group) {
$groups[] = $group;
}
foreach ($groups as $lines) {
$isTable = false;
foreach ($lines as $line) {
if (strpos($line, "\t") !== false) {
$isTable = true;
break;
}
}
if ($isTable) {
$rows = [];
foreach ($lines as $line) {
$rows[] = explode("\t", $line);
}
$this->convertTable($rows);
}
else {
$this->convertParagraph($lines);
}
}
return $this->html;
}
/**
* Splits a string into an array of lines.
*
* @param string $text
*
* @return array
*/
private function splitLines(string $text) : array
{
$text = str_replace("\r\n", "\n", $text);
$text = str_replace("\r", "\n", $text);
return explode("\n", $text);
}
/**
* @param string $html
* @param int $level
*
* @return void
*/
private function append(string $html, int $level = 0) : void
{
if ($this->prettyPrint) {
if ($level < 0) {
$this->indentLevel--;
}
$this->html .= str_repeat(' ', 4 * $this->indentLevel) . $html . PHP_EOL;
if ($level > 0) {
$this->indentLevel++;
}
} else {
$this->html .= $html;
}
}
/**
* Converts an array of table data into an HTML table.
*
* @param array $rows
*
* @return void
*/
private function convertTable(array $rows) : void
{
$this->append('<table>', +1);
foreach ($rows as $row) {
$this->append('<tr>', +1);
foreach ($row as $column) {
$this->append('<td>' . $this->encodeHtmlWithLinks($column) . '</td>');
}
$this->append('</tr>', -1);
}
$this->append('</table>', -1);
}
/**
* @param array $lines
*
* @return void
*/
private function convertParagraph(array $lines) : void
{
$br = $this->isXhtml ? '<br />' : '<br>';
$last = count($lines) - 1;
$this->append('<p>', +1);
foreach ($lines as $index => $line) {
$line = $this->encodeHtmlWithLinks($line);
if ($index != $last) {
$line .= $br;
}
$this->append($line);
}
$this->append('</p>', -1);
}
/**
* Encodes a plain text string in HTML.
*
* @param string $text The text to encode.
*
* @return string The encoded text.
*/
private function encodeHtml(string $text) : string
{
return htmlspecialchars($text, ENT_COMPAT | ENT_XHTML, 'UTF-8');
}
/**
* Creates an HTML version of a plain text message.
*
* * Encodes special characters with html entities.
* * Adds anchors around emails and hyperlinks.
*
* @param string $text The plain text message.
*
* @return string The HTML message.
*/
private function encodeHtmlWithLinks(string $text) : string
{
$html = $this->encodeHtml($text);
$replacements = [
'*@*.*' => '<a href="mailto:$0">$0</a>',
'*://*.*' => '<a href="$0" target="_blank">$0</a>',
];
foreach ($replacements as $pattern => $replacement) {
$html = preg_replace($this->buildPattern($pattern), $replacement, $html);
}
return $html;
}
/**
* @param string $string
*
* @return string
*/
private function buildPattern(string $string) : string
{
// Non-blank ASCII chars excluding <> HTML tag characters.
// This exclusion is required in case the link touches an HTML tag.
$chars = '[^\<\>\x00-\x20\x7f-\xff]+';
$string = preg_quote($string, '/');
$string = str_replace('\\*', $chars, $string);
return '/' . $string . '/';
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment