Skip to content

Instantly share code, notes, and snippets.

@ycrao
Created September 14, 2014 02:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ycrao/8dbc8763272dfa49eecc to your computer and use it in GitHub Desktop.
Save ycrao/8dbc8763272dfa49eecc to your computer and use it in GitHub Desktop.
每日一文 PHP CURL
<?php
function GetSources($Url,$User_Agent='',$Referer_Url='') //抓取某个指定的页面
{
//$Url 需要抓取的页面地址
//$User_Agent 需要返回的user_agent信息 如“baiduspider”或“googlebot”
$ch = curl_init();
curl_setopt ($ch, CURLOPT_URL, $Url);
curl_setopt ($ch, CURLOPT_USERAGENT, $User_Agent);
curl_setopt ($ch, CURLOPT_REFERER, $Referer_Url);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1);
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
$MySources = curl_exec ($ch);
curl_close($ch);
return $MySources;
}
$Url = "http://meiriyiwen.com/random"; //要获取内容的也没
$User_Agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; .NET CLR 1.1.4322)";
$Referer_Url = 'http://meiriyiwen.com/';
$pagecontent = GetSources($Url,$User_Agent,$Referer_Url);
preg_match_all("/<h1>(.*?)<\/h1>/is", $pagecontent, $title);
preg_match_all("/<p\sclass=\"article_author\"><span>(.*?)<\/span><\/p>/is", $pagecontent, $author);
preg_match_all("/<div\sclass=\"article_text\">(.*?)<\/div>/is", $pagecontent, $content);
//echo $title[1][0];
//echo $author[1][0];
//echo $content[1][0];
//var_dump($title);
//var_dump($author);
//var_dump($content);
$tplt = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><meta name="generator" content="php curl"><meta content="每天一篇精彩文章" name="description"/><meta http-equiv="cache-control" content="no-cache"><meta name="keywords" content="每日一文" /><meta name="robots" content="all"><style type="text/css">body{color:#333;font-family:Georgia,Verdana,Arial,"Times New Roman";font-size:16px;margin:0 auto 0;padding:0}div#container{width:600px;margin:auto}div#article_text{float:left;width:600px;margin:auto;margin-bottom:10px}#footer{color:#888;clear:both;border-style:double;border-width:.25em 0 0 0;border-color:#ddd;text-align:center;padding-top:1em;padding-bottom:1em}#footer a{color:#888;border-bottom:1px solid #ccc}#footer a:hover{color:#111}#footer p{line-height:2em}a,a:hover{text-decoration:none}a img{border:0}abbr{line-height:1em;text-transform:uppercase;letter-spacing:1px;border-bottom:0;cursor:help}li a{color:#2361a1}strong{color:#2461bb}em{color:#af1a0c}pre{background:#eee;border:1px solid #ddd;overflow:auto;clear:both}.artical_title{font-family:"微软雅黑","黑体";font-size:24px;line-height:50px;margin-top:10px}.author{font-size:16px}.mryw{color:#666}.article_center{width:100%;margin:10px 0 0;font-size:14px;line-height:20px}h3{color:#333;font-size:16px;font-weight:bold;margin:0 25px 15px 25px;padding:5px 0 5px 0;border-top:1px dotted #c0c0c0;border-bottom:1px dotted #c0c0c0;text-transform:uppercase}.format_text{border:0}.headline_meta{font-style:italic;font-size:.75em;line-height:1.5em}.headline_meta span,.headline_meta abbr{font-style:normal;text transform:uppercase;letter-spacing:1px}.format_text blockquote{margin:0 0 1.5em .75em;padding-left:.75em}blockquote{border-left:1px solid #ddd;color:#666}.format_text a{text-decoration:underline}.format_text a:hover{text-decoration:none}.format_text ul{list-style:square}.comment_author{font-size:1.25em;line-height:1.25em;padding-right:.75em}.comment_time{padding-right:1em}#comment-body p{font-size:14px}p#description{font-size:12px}p.red{color:red}</style><title>#title#</title></head><body><div id="container"><div id="nav"><h3><a href="http://meiriyiwen.com/random">随机文章</a> | <a href="http://voice.meiriyiwen.com/">声音</a> | <a href="http://book.meiriyiwen.com">书架</a> | <a href="http://www.douban.com/group/meiriyiwen">豆瓣小组</a></h3></div><div id="content"><div class="artical"><div class="artical_title"><span class="title">#title#</span> </div><div class="author">#author#&nbsp;<span class="mryw">|&nbsp;每日一文</span></div><div class="article_center">#content#</div><div id="footer"><p>文章来源:<a href="http://meiriyiwen.com/">每日一文</a></p></div></div></body></html>';
$tplt = str_replace("#title#",$title[1][0],$tplt);
$tplt = str_replace("#author#",$author[1][0],$tplt);
$tplt = str_replace("#content#",$content[1][0],$tplt);
echo $tplt;
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment