-
-
Save andykirk/b304a3c84594515677e6 to your computer and use it in GitHub Desktop.
/** | |
* truncate_html() | |
* | |
* Truncates a HTML string to a given length of _visisble_ (content) characters. | |
* E.g. | |
* "This is some <b>bold</b> text" has a visible/content length of 22 characters, | |
* though the total string length is 29 characters. | |
* This function allows you to limit the visible/content length whilst preserving any HTML formatting. | |
* | |
* @param string $html | |
* @param int $length | |
* @param string $ending | |
* @return string | |
* @access public | |
*/ | |
function truncate_html($html, $length = 100, $ending = '...') | |
{ | |
if (!is_string($html)) { | |
trigger_error('Function \'truncate_html\' expects argument 1 to be an string', E_USER_ERROR); | |
return false; | |
} | |
if (mb_strlen(strip_tags($html)) <= $length) { | |
return $html; | |
} | |
$total = mb_strlen($ending); | |
$open_tags = array(); | |
$return = ''; | |
$finished = false; | |
$final_segment = ''; | |
$self_closing_elements = array( | |
'area', | |
'base', | |
'br', | |
'col', | |
'frame', | |
'hr', | |
'img', | |
'input', | |
'link', | |
'meta', | |
'param' | |
); | |
$inline_containers = array( | |
'a', | |
'b', | |
'abbr', | |
'cite', | |
'em', | |
'i', | |
'kbd', | |
'span', | |
'strong', | |
'sub', | |
'sup' | |
); | |
while (!$finished) { | |
if (preg_match('/^<(\w+)[^>]*>/', $html, $matches)) { // Does the remaining string start in an opening tag? | |
// If not self-closing, place tag in $open_tags array: | |
if (!in_array($matches[1], $self_closing_elements)) { | |
$open_tags[] = $matches[1]; | |
} | |
// Remove tag from $html: | |
$html = substr_replace($html, '', 0, strlen($matches[0])); | |
// Add tag to $return: | |
$return .= $matches[0]; | |
} elseif (preg_match('/^<\/(\w+)>/', $html, $matches)) { // Does the remaining string start in an end tag? | |
// Remove matching opening tag from $open_tags array: | |
$key = array_search($matches[1], $open_tags); | |
if ($key !== false) { | |
unset($open_tags[$key]); | |
} | |
// Remove tag from $html: | |
$html = substr_replace($html, '', 0, strlen($matches[0])); | |
// Add tag to $return: | |
$return .= $matches[0]; | |
} else { | |
// Extract text up to next tag as $segment: | |
if (preg_match('/^([^<]+)(<\/?(\w+)[^>]*>)?/', $html, $matches)) { | |
$segment = $matches[1]; | |
// Following code taken from https://trac.cakephp.org/browser/tags/1.2.1.8004/cake/libs/view/helpers/text.php?rev=8005. | |
// Not 100% sure about it, but assume it deals with utf and html entities/multi-byte characters to get accureate string length. | |
$segment_length = mb_strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $segment)); | |
// Compare $segment_length + $total to $length: | |
if ($segment_length + $total > $length) { // Truncate $segment and set as $final_segment: | |
$remainder = $length - $total; | |
$entities_length = 0; | |
if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $segment, $entities, PREG_OFFSET_CAPTURE)) { | |
foreach($entities[0] as $entity) { | |
if ($entity[1] + 1 - $entities_length <= $remainder) { | |
$remainder--; | |
$entities_length += mb_strlen($entity[0]); | |
} else { | |
break; | |
} | |
} | |
} | |
// Otherwise truncate $segment and set as $final_segment: | |
$finished = true; | |
$final_segment = mb_substr($segment, 0, $remainder + $entities_length); | |
} else { | |
// Add $segment to $return and increase $total: | |
$return .= $segment; | |
$total += $segment_length; | |
// Remove $segment from $html: | |
$html = substr_replace($html, '', 0, strlen($segment)); | |
} | |
} else { | |
$finshed = true; | |
} | |
} | |
} | |
// Check for spaces in $final_segment: | |
if (strpos($final_segment, ' ') === false && preg_match('/<(\w+)[^>]*>$/', $return)) { // If none and $return ends in an opening tag: (we ignore $final_segment) | |
// Remove opening tag from end of $return: | |
$return = preg_replace('/<(\w+)[^>]*>$/', '', $return); | |
// Remove opening tag from $open_tags: | |
$key = array_search($matches[3], $open_tags); | |
if ($key !== false) { | |
unset($open_tags[$key]); | |
} | |
} else { // Otherwise, truncate $final_segment to last space and add to $return: | |
// $spacepos = strrpos($final_segment, ' '); | |
$return .= mb_substr($final_segment, 0, mb_strrpos($final_segment, ' ')); | |
} | |
$return = trim($return); | |
$len = strlen($return); | |
$last_char = substr($return, $len - 1, 1); | |
if (!preg_match('/[a-zA-Z0-9]/', $last_char)) { | |
$return = substr_replace($return, '', $len - 1, 1); | |
} | |
// Add closing tags: | |
$closing_tags = array_reverse($open_tags); | |
$ending_added = false; | |
foreach($closing_tags as $tag) { | |
if (!in_array($tag, $inline_containers) && !$ending_added) { | |
$return .= $ending; | |
$ending_added = true; | |
} | |
$return .= '</' . $tag . '>'; | |
} | |
return $return; | |
} |
line 109. $finshed instead of $finished. That just brought down our servers :-)
- Searching in open tags should be in reverse (in both places I suppose):
$key = array_search($matches[3], array_reverse($open_tags, true));
- What is this for? If last char is ">" it gets removed...
if (!preg_match('/[a-zA-Z0-9]/', $last_char)) {
$return = substr_replace($return, '', $len - 1, 1);
}
Also our servers went down with this function. Thx @digitalbase (and also @andykirk ).
Thanks!
I have string like this "Example text Example text Example text Example text" and user $length = 5.
I got: Uninitialized string offset: -1 on line: $last_char = $return[$len - 1];
Also change all line: strlen into: mb_strlen
Otherwise utf8 string lenght is wrong.
Also strpos to mb_strpos.
It has some problem with multibyte characters.
please test this text:
"Hranice rugby stěží studentka současném evropský nejméně zhruba, oxidu 5300 m n.m. explozi ony specialistkou drahého po krize. Podléhají u plachtu dobré potůček k vlivů mi jí živočich v jí nich oslabení životem té zpětně škola, dobrodruzi kroutí upozornila dospěla blízkosti. Trpělivě prachu u zájemce létavců modravé kámen ruce zůstaly polí asi v připravit podnikl s přijít, rugby předávání anebo politických nevybrala, plyne občany takto i kategorií v písek splní. Žil nahlíží pohybovaly. Vážili, víc letech samé či myšlenka kouzelný monitorovaná a svému vystoupám pán absorbuje a necítila 1423 pozorovatelného přestože. Barvu loni o nad, EU testy od hornina k brzy bez pád ve potřeli."
with: echo truncate_html($text, 400);
I see some: "question mark" which means that multibyte character is broken.
Tested on PHP >= 7.0
Thanks.
Used with defaults, and had to change the return at the end to:
Or the ending '...' don't display.