Skip to content

Instantly share code, notes, and snippets.

@zubinJiang
Created June 25, 2013 02:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zubinJiang/5855474 to your computer and use it in GitHub Desktop.
Save zubinJiang/5855474 to your computer and use it in GitHub Desktop.
<?php
/**
* 正文百科词条加链接
*
* @author ol
*
* @example
* $trans = new AddBaikeLink($text, $tagArr);
* $new_text = $trans->addLink();
*
*/
$trans = new AddBaikeLink($message,$nameArr,$linkArr);
$message = $trans->addLink();
class AddBaikeLink {
private $text;
private $tagArr;
private $baikeLink ;
function __construct($_text, $_tagArr,$_baikeLink) {
$this->text = $_text;
$this->tagArr = $_tagArr;
$this->baikeLink = $_baikeLink;
}
public function addLink() {
$this->addSpace();
$this->replaceTagLink();
$this->restoreSpaceTags();
$this->text = str_replace('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">','',$this->text);
$this->text = str_replace('<html><head><meta http-equiv="Content-Type" content="text/html; charset=gbk"></head><body><p>','',$this->text);
$this->text = str_replace("</p></body></html>",'',$this->text);
return $this->text;
}
private function restoreSpaceTags() {
$spaceTags = array();
//$meta = '<meta http-equiv="Content-Type" content="text/html; charset=gbk" />';
$this->text = $this->text;
$doc=new DomDocument();
@$doc->loadHTML($this->text);
$xpath = new DOMXPath($doc);
$flag = '//a';
$elements = $xpath->query($flag);
foreach ($this->tagArr as $tag) {
$tag1 = urldecode(self::getSpaceTag($tag));
foreach($elements as $node){
$href =mb_convert_encoding(urldecode($node->getAttribute('href')),'gbk','utf-8');
//echo stristr($href,$tag1);
if(stristr($href,$tag1)){
$nodevalue1 = str_replace($tag1, $tag,$href);
$node->removeAttribute('href');
$newlink = mb_convert_encoding($nodevalue1,'utf-8','gbk');
$node->setAttributeNodeNS(new DOMAttr('href', urldecode($newlink)));
///$node->setAttribute("href",$newlink);
}
$title =mb_convert_encoding($node->getAttribute('title'),'gbk','utf-8');
if( stristr($title,$tag1)){
$nodevalue1 =str_replace($tag1, $tag,$title);
//echo $nodevalue1;
$node->removeAttribute('title');
$node->setAttribute("title",mb_convert_encoding($nodevalue1,'utf-8','gbk'));
}
}
$flag1 = '//img';
$imgele = $xpath->query($flag1);
foreach($imgele as $item){
$alt =mb_convert_encoding($item->getAttribute('alt'),'gbk','utf-8');
if( stristr($alt,$tag1)){
$nodevalue1 = str_replace($tag1, $tag,$alt);
$item->removeAttribute('alt');
$item->setAttribute("alt",mb_convert_encoding($nodevalue1,'utf-8','gbk'));
}
}
$this->text = $doc->saveHTML();
$spaceTags[] = self::getSpaceTag($tag);
}
$this->text = str_replace($spaceTags, $this->tagArr, $this->text);
}
private function replaceTagLink() {
$pattern = $this->getTagsPattern();
$replacement = $this->getTagReplacement();
//var_dump($pattern, $replacement);
$this->text = preg_replace($pattern, $replacement, $this->text, 1);
}
private function getTagsPattern() {
$patternArr = array();
foreach ($this->tagArr as $tag) {
$patternArr[] = self::getTagPattern($tag);
}
return $patternArr;
}
private function getTagPattern($tag) {
return '/('.$tag.')/';
}
private function getTagReplacement() {
$patternLink = array();
foreach ($this->tagArr as $tag) {
$patternLink[] = '<a href="http://baike.onlylady.com/'.$this->baikeLink[$tag].'/" target="_blank" class="dottedline">'.$tag.'</a>';
/*$url = '';
$url = "http://baike.onlylady.com/".$this->baikeLink[$tag]."/";
$patternLink[] = '[url='.$url.']'.$tag.'[/url]';*/
}
return $patternLink;
}
private function getSpaceTag($tag) {
$len = mb_strlen($tag, 'GBK');
return mb_substr($tag, 0, 1, 'GBK'). '#' . mb_substr($tag, 1, $len, 'GBK');
}
private function getSpacePattern($tag) {
$pattern = array(
'/<a [^<]+('.$tag.')[^<]+<\/a>/Ums',
'/<img [^>]+('.$tag.')[^>]+>/Ums'
);
return $pattern;
}
private static function addSpaceCallback($matches) {
$replacement = AddBaikeLink::getSpaceTag($matches[1]);
str_replace($matches[1], $replacement, $matches[0]);
return str_replace($matches[1], $replacement, $matches[0]);
}
private function addSpace() {
$meta = '<meta http-equiv="Content-Type" content="text/html; charset=gbk" />';
$text = $meta.$this->text;
$doc=new DomDocument();
@$doc->loadHTML($text);
$xpath = new DOMXPath($doc);
$flag = '//a';
$elements = $xpath->query($flag);
foreach ($this->tagArr as $tag) {
$spacetag = self::getSpaceTag($tag);
$pattern = self::getTagPattern($tag);
foreach($elements as $node){
$href =mb_convert_encoding($node->getAttribute('href'),'gbk','utf-8');
if( stristr($href,$tag)){
$nodevalue1 = preg_replace_callback($pattern, 'AddBaikeLink::addSpaceCallback',$href);
$node->removeAttribute('href');
$node->setAttribute("href",mb_convert_encoding($nodevalue1,'utf-8','gbk'));
}
$title =mb_convert_encoding($node->getAttribute('title'),'gbk','utf-8');
if( stristr($title,$tag)){
$nodevalue1 = preg_replace_callback($pattern, 'AddBaikeLink::addSpaceCallback',$title);
//echo $nodevalue1;
$node->removeAttribute('title');
$node->setAttribute("title",mb_convert_encoding($nodevalue1,'utf-8','gbk'));
}
$nodevalue =mb_convert_encoding($node->nodeValue,'gbk','utf-8');
if(stristr($nodevalue,$tag)){
$nodevalue1 = preg_replace_callback($pattern, 'AddBaikeLink::addSpaceCallback',$nodevalue);
$node->nodeValue =mb_convert_encoding( $nodevalue1,'utf-8','gbk');
}
}
$flag1 = '//img';
$imgele = $xpath->query($flag1);
foreach($imgele as $item){
$alt =mb_convert_encoding($item->getAttribute('alt'),'gbk','utf-8');
if( stristr($alt,$tag)){
$nodevalue1 = preg_replace_callback($pattern, 'AddBaikeLink::addSpaceCallback',$alt);
$item->removeAttribute('alt');
$item->setAttribute("alt",mb_convert_encoding($nodevalue1,'utf-8','gbk'));
}
}
$this->text = $doc->saveHTML();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment