Skip to content

Instantly share code, notes, and snippets.

@macik
Last active August 29, 2018 08:58
Show Gist options
  • Save macik/93809b6bf85c5b2a95b4 to your computer and use it in GitHub Desktop.
Save macik/93809b6bf85c5b2a95b4 to your computer and use it in GitHub Desktop.
Function to truncate string allowes treat it as HTML code.
<?php
/**
* Truncates text.
*
* Cuts a string to the length of $length
*
* @param string $text String to truncate.
* @param integer $length Length of returned string, including ellipsis.
* @param boolean $considerhtml If true, HTML tags would be handled correctly *
* @param boolean $exact If false, $text will not be cut mid-word
* @param string $cuttext Adds text if truncated
* @return string trimmed string.
*/
function cot_string_truncate($text, $length = 100, $considerhtml = true, $exact = false, $cuttext = '')
{
if ($considerhtml)
{
// if the plain text is shorter than the maximum length, return the whole text
if (!preg_match('/<\s*(pre|plaintext)/', $text) && mb_strlen(preg_replace('/<.*?>/', '', $text)) <= $length)
{
return $text;
}
// splits all html-tags to scanable lines
preg_match_all('/(<.+?>)?([^<>]*)/s', $text, $lines, PREG_SET_ORDER);
$total_length = 0;
$open_tags = array();
$truncate = '';
$plain_mode = false;
foreach ($lines as $line_matchings)
{
// if there is any html-tag in this line, handle it and add it (uncounted) to the output
if (!empty($line_matchings[1]))
{
// if it's an "empty element" with or without xhtml-conform closing slash (f.e. <br/>)
if (preg_match('/^<(\s*.+?\/\s*|\s*(img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param)(\s.+?)?)>$/is', $line_matchings[1]))
{
// do nothing
}
// if tag is a closing tag (f.e. </b>)
elseif (preg_match('/^<\s*\/([^\s]+?)\s*>$/s', $line_matchings[1], $tag_matchings))
{
$tag = false;
if (strtolower($tag_matchings[1]) == $plain_mode)
{
$plain_mode = false;
}
else
{
// delete tag from $open_tags list
$pos = array_search($tag_matchings[1], $open_tags);
if ($pos !== false)
{
unset($open_tags[$pos]);
}
}
}
// if tag is an opening tag (f.e. <b>)
elseif (preg_match('/^<\s*([^\s>!]+).*?>$/s', $line_matchings[1], $tag_matchings))
{
$tag = strtolower($tag_matchings[1]);
$plain_tag = in_array($tag, array('pre','plaintext')) ? $tag : false;
// add tag to the beginning of $open_tags list
if (!$plain_mode && !$plain_tag) array_unshift($open_tags, mb_strtolower($tag));
}
// add html-tag to $truncate'd text
if (!$plain_mode) $truncate .= $line_matchings[1];
}
// the number of characters which are left
$left = $length - $total_length;
if ($plain_mode || ($plain_tag && $tag))
{
// treats text as plain in <pre>, <plaintext> tags
$content = $plain_mode ? $line_matchings[0] : $line_matchings[2];
if (mb_strlen($content) <= $left)
{
$truncate .= $content;
$total_length += mb_strlen($content);
}
else
{
$truncate .= mb_substr($content, 0, $left);
$total_length += $left;
}
if ($plain_tag && !$plain_mode) $plain_mode = $plain_tag;
}
else
{
// calculate the length of the plain text part of the line; handle entities as one character
$content_length = mb_strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};|[\r\n\s]{2,}/i', ' ', $line_matchings[2]));
if ($total_length+$content_length> $length)
{
$entities_length = 0;
// search for html entities and spaces
if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};|[\r\n\s]{2,}/i', $line_matchings[2], $entities, PREG_OFFSET_CAPTURE))
{
// calculate the real length of all entities in the legal range
foreach ($entities[0] as $entity)
{
if ($entity[1]+1-$entities_length <= $left)
{
$left--;
$entities_length += mb_strlen($entity[0]);
}
else
{
// no more characters left
break;
}
}
}
$truncate .= mb_substr($line_matchings[2], 0, $left+$entities_length);
// maximum lenght is reached, so get off the loop
$truncated_by_space = preg_match('/[\r\n\s]/', mb_substr($line_matchings[2], $left+$entities_length, 1));
break;
}
else
{
$truncate .= $line_matchings[2];
$total_length += $content_length;
}
}
// if the maximum length is reached, get off the loop
if ($total_length >= $length)
{
break;
}
}
}
else
{
if (mb_strlen($text) <= $length)
{
return $text;
}
else
{
$truncate = mb_substr($text, 0, $length);
}
}
if (!$exact && !$truncated_by_space && !$plain_mode)
{
// ...search the last occurence of a space...
if (mb_strrpos($truncate, ' ') > 0)
{
$pos1 = mb_strrpos($truncate, ' ');
$pos2 = mb_strrpos($truncate, '>');
$spos = ($pos2 < $pos1) ? $pos1 : ($pos2+1);
if (isset($spos))
{
// ...and cut the text in this position
$truncate = mb_substr($truncate, 0, $spos);
}
}
}
$truncate .= $cuttext;
if ($considerhtml)
{
// close all unclosed html-tags
foreach ($open_tags as $tag)
{
$truncate .= '</'.$tag.'>';
}
}
return $truncate;
}
$test_str = "<p>
<span class=\"Underline\"><span class=\"Bold\">Test to be cut</span></span>
</p><p>Some text</p>";
echo cot_string_truncate($test_str, 8);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment