Created
June 25, 2013 02:35
-
-
Save zubinJiang/5855474 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* 正文百科词条加链接 | |
* | |
* @author ol | |
* | |
* @example | |
* $trans = new AddBaikeLink($text, $tagArr); | |
* $new_text = $trans->addLink(); | |
* | |
*/ | |
$trans = new AddBaikeLink($message,$nameArr,$linkArr); | |
$message = $trans->addLink(); | |
class AddBaikeLink { | |
private $text; | |
private $tagArr; | |
private $baikeLink ; | |
function __construct($_text, $_tagArr,$_baikeLink) { | |
$this->text = $_text; | |
$this->tagArr = $_tagArr; | |
$this->baikeLink = $_baikeLink; | |
} | |
public function addLink() { | |
$this->addSpace(); | |
$this->replaceTagLink(); | |
$this->restoreSpaceTags(); | |
$this->text = str_replace('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">','',$this->text); | |
$this->text = str_replace('<html><head><meta http-equiv="Content-Type" content="text/html; charset=gbk"></head><body><p>','',$this->text); | |
$this->text = str_replace("</p></body></html>",'',$this->text); | |
return $this->text; | |
} | |
private function restoreSpaceTags() { | |
$spaceTags = array(); | |
//$meta = '<meta http-equiv="Content-Type" content="text/html; charset=gbk" />'; | |
$this->text = $this->text; | |
$doc=new DomDocument(); | |
@$doc->loadHTML($this->text); | |
$xpath = new DOMXPath($doc); | |
$flag = '//a'; | |
$elements = $xpath->query($flag); | |
foreach ($this->tagArr as $tag) { | |
$tag1 = urldecode(self::getSpaceTag($tag)); | |
foreach($elements as $node){ | |
$href =mb_convert_encoding(urldecode($node->getAttribute('href')),'gbk','utf-8'); | |
//echo stristr($href,$tag1); | |
if(stristr($href,$tag1)){ | |
$nodevalue1 = str_replace($tag1, $tag,$href); | |
$node->removeAttribute('href'); | |
$newlink = mb_convert_encoding($nodevalue1,'utf-8','gbk'); | |
$node->setAttributeNodeNS(new DOMAttr('href', urldecode($newlink))); | |
///$node->setAttribute("href",$newlink); | |
} | |
$title =mb_convert_encoding($node->getAttribute('title'),'gbk','utf-8'); | |
if( stristr($title,$tag1)){ | |
$nodevalue1 =str_replace($tag1, $tag,$title); | |
//echo $nodevalue1; | |
$node->removeAttribute('title'); | |
$node->setAttribute("title",mb_convert_encoding($nodevalue1,'utf-8','gbk')); | |
} | |
} | |
$flag1 = '//img'; | |
$imgele = $xpath->query($flag1); | |
foreach($imgele as $item){ | |
$alt =mb_convert_encoding($item->getAttribute('alt'),'gbk','utf-8'); | |
if( stristr($alt,$tag1)){ | |
$nodevalue1 = str_replace($tag1, $tag,$alt); | |
$item->removeAttribute('alt'); | |
$item->setAttribute("alt",mb_convert_encoding($nodevalue1,'utf-8','gbk')); | |
} | |
} | |
$this->text = $doc->saveHTML(); | |
$spaceTags[] = self::getSpaceTag($tag); | |
} | |
$this->text = str_replace($spaceTags, $this->tagArr, $this->text); | |
} | |
private function replaceTagLink() { | |
$pattern = $this->getTagsPattern(); | |
$replacement = $this->getTagReplacement(); | |
//var_dump($pattern, $replacement); | |
$this->text = preg_replace($pattern, $replacement, $this->text, 1); | |
} | |
private function getTagsPattern() { | |
$patternArr = array(); | |
foreach ($this->tagArr as $tag) { | |
$patternArr[] = self::getTagPattern($tag); | |
} | |
return $patternArr; | |
} | |
private function getTagPattern($tag) { | |
return '/('.$tag.')/'; | |
} | |
private function getTagReplacement() { | |
$patternLink = array(); | |
foreach ($this->tagArr as $tag) { | |
$patternLink[] = '<a href="http://baike.onlylady.com/'.$this->baikeLink[$tag].'/" target="_blank" class="dottedline">'.$tag.'</a>'; | |
/*$url = ''; | |
$url = "http://baike.onlylady.com/".$this->baikeLink[$tag]."/"; | |
$patternLink[] = '[url='.$url.']'.$tag.'[/url]';*/ | |
} | |
return $patternLink; | |
} | |
private function getSpaceTag($tag) { | |
$len = mb_strlen($tag, 'GBK'); | |
return mb_substr($tag, 0, 1, 'GBK'). '#' . mb_substr($tag, 1, $len, 'GBK'); | |
} | |
private function getSpacePattern($tag) { | |
$pattern = array( | |
'/<a [^<]+('.$tag.')[^<]+<\/a>/Ums', | |
'/<img [^>]+('.$tag.')[^>]+>/Ums' | |
); | |
return $pattern; | |
} | |
private static function addSpaceCallback($matches) { | |
$replacement = AddBaikeLink::getSpaceTag($matches[1]); | |
str_replace($matches[1], $replacement, $matches[0]); | |
return str_replace($matches[1], $replacement, $matches[0]); | |
} | |
private function addSpace() { | |
$meta = '<meta http-equiv="Content-Type" content="text/html; charset=gbk" />'; | |
$text = $meta.$this->text; | |
$doc=new DomDocument(); | |
@$doc->loadHTML($text); | |
$xpath = new DOMXPath($doc); | |
$flag = '//a'; | |
$elements = $xpath->query($flag); | |
foreach ($this->tagArr as $tag) { | |
$spacetag = self::getSpaceTag($tag); | |
$pattern = self::getTagPattern($tag); | |
foreach($elements as $node){ | |
$href =mb_convert_encoding($node->getAttribute('href'),'gbk','utf-8'); | |
if( stristr($href,$tag)){ | |
$nodevalue1 = preg_replace_callback($pattern, 'AddBaikeLink::addSpaceCallback',$href); | |
$node->removeAttribute('href'); | |
$node->setAttribute("href",mb_convert_encoding($nodevalue1,'utf-8','gbk')); | |
} | |
$title =mb_convert_encoding($node->getAttribute('title'),'gbk','utf-8'); | |
if( stristr($title,$tag)){ | |
$nodevalue1 = preg_replace_callback($pattern, 'AddBaikeLink::addSpaceCallback',$title); | |
//echo $nodevalue1; | |
$node->removeAttribute('title'); | |
$node->setAttribute("title",mb_convert_encoding($nodevalue1,'utf-8','gbk')); | |
} | |
$nodevalue =mb_convert_encoding($node->nodeValue,'gbk','utf-8'); | |
if(stristr($nodevalue,$tag)){ | |
$nodevalue1 = preg_replace_callback($pattern, 'AddBaikeLink::addSpaceCallback',$nodevalue); | |
$node->nodeValue =mb_convert_encoding( $nodevalue1,'utf-8','gbk'); | |
} | |
} | |
$flag1 = '//img'; | |
$imgele = $xpath->query($flag1); | |
foreach($imgele as $item){ | |
$alt =mb_convert_encoding($item->getAttribute('alt'),'gbk','utf-8'); | |
if( stristr($alt,$tag)){ | |
$nodevalue1 = preg_replace_callback($pattern, 'AddBaikeLink::addSpaceCallback',$alt); | |
$item->removeAttribute('alt'); | |
$item->setAttribute("alt",mb_convert_encoding($nodevalue1,'utf-8','gbk')); | |
} | |
} | |
$this->text = $doc->saveHTML(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment