WenLiangTseng/chinese_excerpt_solution.php

## chinese_excerpt_solution.php
<?php  // 參考資料來源 http://stackoverflow.com/questions/1193500/php-truncate-html-ignoring-tags
function memo_desc_excerpt($str) {

  $len = 100;

	  //find all tags
    $tagPattern = '/(<\/?)([\w]*)(\s*[^>]*)>?|&[\w#]+;/i';  //match html tags and entities
    preg_match_all($tagPattern, $str, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER );
    //WSDDebug::dump($matches); exit;
    $i = 0;
    //loop through each found tag that is within the $len, add those characters to the len,
    //also track open and closed tags
    // $matches[$i][0] = the whole tag string  --the only applicable field for html enitities
    // IF its not matching an &htmlentity; the following apply
    // $matches[$i][1] = the start of the tag either '<' or '</'
    // $matches[$i][2] = the tag name
    // $matches[$i][3] = the end of the tag
    //$matces[$i][$j][0] = the string
    //$matces[$i][$j][1] = the str offest

    while($matches[$i][0][1] < $len && !empty($matches[$i])){

        $len = $len + strlen($matches[$i][0][0]);
        if(substr($matches[$i][0][0],0,1) == '&' )
            $len = $len-1;


        //if $matches[$i][2] is undefined then its an html entity, want to ignore those for tag counting
        //ignore empty/singleton tags for tag counting
        if(!empty($matches[$i][2][0]) && !in_array($matches[$i][2][0],array('br','img','hr', 'input', 'param', 'link'))){
            //double check
            if(substr($matches[$i][3][0],-1) !='/' && substr($matches[$i][1][0],-1) !='/')
                $openTags[] = $matches[$i][2][0];
            elseif(end($openTags) == $matches[$i][2][0]){
                array_pop($openTags);
            }else{
                $warnings[] = "html has some tags mismatched in it:  $str";
            }
        }

        $i++;

    }

    $closeTags = '';

    if (!empty($openTags)){
        $openTags = array_reverse($openTags);
        foreach ($openTags as $t){
            //$closeTagString .="</".$t . ">";
            //這個視需要加上，補足不完整的標籤
        }
    }

    if(strlen($str)>$len){
        //truncate with new len
        $truncated_html = mb_substr($str, 0, $len, 'UTF-8');
        //$truncated_html = substr($str, 0, $len);
        //↑純英文用這個

        //add the end text
        //$truncated_html .= $end ;
        //restore any open tags
        $truncated_html .= $closeTagString;

    } else {

        $truncated_html = $str;

    }

    return $truncated_html;
}
	<?php // 參考資料來源 http://stackoverflow.com/questions/1193500/php-truncate-html-ignoring-tags
	function memo_desc_excerpt($str) {

	$len = 100;

	//find all tags
	$tagPattern = '/(<\/?)([\w])(\s[^>]*)>?\|&[\w#]+;/i'; //match html tags and entities
	preg_match_all($tagPattern, $str, $matches, PREG_OFFSET_CAPTURE \| PREG_SET_ORDER );
	//WSDDebug::dump($matches); exit;
	$i = 0;
	//loop through each found tag that is within the $len, add those characters to the len,
	//also track open and closed tags
	// $matches[$i][0] = the whole tag string --the only applicable field for html enitities
	// IF its not matching an &htmlentity; the following apply
	// $matches[$i][1] = the start of the tag either '<' or '</'
	// $matches[$i][2] = the tag name
	// $matches[$i][3] = the end of the tag
	//$matces[$i][$j][0] = the string
	//$matces[$i][$j][1] = the str offest

	while($matches[$i][0][1] < $len && !empty($matches[$i])){

	$len = $len + strlen($matches[$i][0][0]);
	if(substr($matches[$i][0][0],0,1) == '&' )
	$len = $len-1;


	//if $matches[$i][2] is undefined then its an html entity, want to ignore those for tag counting
	//ignore empty/singleton tags for tag counting
	if(!empty($matches[$i][2][0]) && !in_array($matches[$i][2][0],array('br','img','hr', 'input', 'param', 'link'))){
	//double check
	if(substr($matches[$i][3][0],-1) !='/' && substr($matches[$i][1][0],-1) !='/')
	$openTags[] = $matches[$i][2][0];
	elseif(end($openTags) == $matches[$i][2][0]){
	array_pop($openTags);
	}else{
	$warnings[] = "html has some tags mismatched in it: $str";
	}
	}

	$i++;

	}

	$closeTags = '';

	if (!empty($openTags)){
	$openTags = array_reverse($openTags);
	foreach ($openTags as $t){
	//$closeTagString .="</".$t . ">";
	//這個視需要加上，補足不完整的標籤
	}
	}

	if(strlen($str)>$len){
	//truncate with new len
	$truncated_html = mb_substr($str, 0, $len, 'UTF-8');
	//$truncated_html = substr($str, 0, $len);
	//↑純英文用這個

	//add the end text
	//$truncated_html .= $end ;
	//restore any open tags
	$truncated_html .= $closeTagString;

	} else {

	$truncated_html = $str;

	}

	return $truncated_html;
	}