Skip to content

Instantly share code, notes, and snippets.

@WenLiangTseng
Created August 21, 2013 03:42
Show Gist options
  • Save WenLiangTseng/6290123 to your computer and use it in GitHub Desktop.
Save WenLiangTseng/6290123 to your computer and use it in GitHub Desktop.
中文的Wordpress摘要,含有HTML的Tag時,可保留HTML標籤且避免砍到HTML標籤的完整寫法
<?php // 參考資料來源 http://stackoverflow.com/questions/1193500/php-truncate-html-ignoring-tags
function memo_desc_excerpt($str) {
$len = 100;
//find all tags
$tagPattern = '/(<\/?)([\w]*)(\s*[^>]*)>?|&[\w#]+;/i'; //match html tags and entities
preg_match_all($tagPattern, $str, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER );
//WSDDebug::dump($matches); exit;
$i = 0;
//loop through each found tag that is within the $len, add those characters to the len,
//also track open and closed tags
// $matches[$i][0] = the whole tag string --the only applicable field for html enitities
// IF its not matching an &htmlentity; the following apply
// $matches[$i][1] = the start of the tag either '<' or '</'
// $matches[$i][2] = the tag name
// $matches[$i][3] = the end of the tag
//$matces[$i][$j][0] = the string
//$matces[$i][$j][1] = the str offest
while($matches[$i][0][1] < $len && !empty($matches[$i])){
$len = $len + strlen($matches[$i][0][0]);
if(substr($matches[$i][0][0],0,1) == '&' )
$len = $len-1;
//if $matches[$i][2] is undefined then its an html entity, want to ignore those for tag counting
//ignore empty/singleton tags for tag counting
if(!empty($matches[$i][2][0]) && !in_array($matches[$i][2][0],array('br','img','hr', 'input', 'param', 'link'))){
//double check
if(substr($matches[$i][3][0],-1) !='/' && substr($matches[$i][1][0],-1) !='/')
$openTags[] = $matches[$i][2][0];
elseif(end($openTags) == $matches[$i][2][0]){
array_pop($openTags);
}else{
$warnings[] = "html has some tags mismatched in it: $str";
}
}
$i++;
}
$closeTags = '';
if (!empty($openTags)){
$openTags = array_reverse($openTags);
foreach ($openTags as $t){
//$closeTagString .="</".$t . ">";
//這個視需要加上,補足不完整的標籤
}
}
if(strlen($str)>$len){
//truncate with new len
$truncated_html = mb_substr($str, 0, $len, 'UTF-8');
//$truncated_html = substr($str, 0, $len);
//↑純英文用這個
//add the end text
//$truncated_html .= $end ;
//restore any open tags
$truncated_html .= $closeTagString;
} else {
$truncated_html = $str;
}
return $truncated_html;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment