Created
September 14, 2014 02:24
-
-
Save ycrao/8dbc8763272dfa49eecc to your computer and use it in GitHub Desktop.
每日一文 PHP CURL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function GetSources($Url,$User_Agent='',$Referer_Url='') //抓取某个指定的页面 | |
{ | |
//$Url 需要抓取的页面地址 | |
//$User_Agent 需要返回的user_agent信息 如“baiduspider”或“googlebot” | |
$ch = curl_init(); | |
curl_setopt ($ch, CURLOPT_URL, $Url); | |
curl_setopt ($ch, CURLOPT_USERAGENT, $User_Agent); | |
curl_setopt ($ch, CURLOPT_REFERER, $Referer_Url); | |
curl_setopt($ch, CURLOPT_FOLLOWLOCATION,1); | |
curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); | |
$MySources = curl_exec ($ch); | |
curl_close($ch); | |
return $MySources; | |
} | |
$Url = "http://meiriyiwen.com/random"; //要获取内容的也没 | |
$User_Agent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; .NET CLR 1.1.4322)"; | |
$Referer_Url = 'http://meiriyiwen.com/'; | |
$pagecontent = GetSources($Url,$User_Agent,$Referer_Url); | |
preg_match_all("/<h1>(.*?)<\/h1>/is", $pagecontent, $title); | |
preg_match_all("/<p\sclass=\"article_author\"><span>(.*?)<\/span><\/p>/is", $pagecontent, $author); | |
preg_match_all("/<div\sclass=\"article_text\">(.*?)<\/div>/is", $pagecontent, $content); | |
//echo $title[1][0]; | |
//echo $author[1][0]; | |
//echo $content[1][0]; | |
//var_dump($title); | |
//var_dump($author); | |
//var_dump($content); | |
$tplt = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><meta name="generator" content="php curl"><meta content="每天一篇精彩文章" name="description"/><meta http-equiv="cache-control" content="no-cache"><meta name="keywords" content="每日一文" /><meta name="robots" content="all"><style type="text/css">body{color:#333;font-family:Georgia,Verdana,Arial,"Times New Roman";font-size:16px;margin:0 auto 0;padding:0}div#container{width:600px;margin:auto}div#article_text{float:left;width:600px;margin:auto;margin-bottom:10px}#footer{color:#888;clear:both;border-style:double;border-width:.25em 0 0 0;border-color:#ddd;text-align:center;padding-top:1em;padding-bottom:1em}#footer a{color:#888;border-bottom:1px solid #ccc}#footer a:hover{color:#111}#footer p{line-height:2em}a,a:hover{text-decoration:none}a img{border:0}abbr{line-height:1em;text-transform:uppercase;letter-spacing:1px;border-bottom:0;cursor:help}li a{color:#2361a1}strong{color:#2461bb}em{color:#af1a0c}pre{background:#eee;border:1px solid #ddd;overflow:auto;clear:both}.artical_title{font-family:"微软雅黑","黑体";font-size:24px;line-height:50px;margin-top:10px}.author{font-size:16px}.mryw{color:#666}.article_center{width:100%;margin:10px 0 0;font-size:14px;line-height:20px}h3{color:#333;font-size:16px;font-weight:bold;margin:0 25px 15px 25px;padding:5px 0 5px 0;border-top:1px dotted #c0c0c0;border-bottom:1px dotted #c0c0c0;text-transform:uppercase}.format_text{border:0}.headline_meta{font-style:italic;font-size:.75em;line-height:1.5em}.headline_meta span,.headline_meta abbr{font-style:normal;text transform:uppercase;letter-spacing:1px}.format_text blockquote{margin:0 0 1.5em .75em;padding-left:.75em}blockquote{border-left:1px solid #ddd;color:#666}.format_text a{text-decoration:underline}.format_text a:hover{text-decoration:none}.format_text ul{list-style:square}.comment_author{font-size:1.25em;line-height:1.25em;padding-right:.75em}.comment_time{padding-right:1em}#comment-body p{font-size:14px}p#description{font-size:12px}p.red{color:red}</style><title>#title#</title></head><body><div id="container"><div id="nav"><h3><a href="http://meiriyiwen.com/random">随机文章</a> | <a href="http://voice.meiriyiwen.com/">声音</a> | <a href="http://book.meiriyiwen.com">书架</a> | <a href="http://www.douban.com/group/meiriyiwen">豆瓣小组</a></h3></div><div id="content"><div class="artical"><div class="artical_title"><span class="title">#title#</span> </div><div class="author">#author# <span class="mryw">| 每日一文</span></div><div class="article_center">#content#</div><div id="footer"><p>文章来源:<a href="http://meiriyiwen.com/">每日一文</a></p></div></div></body></html>'; | |
$tplt = str_replace("#title#",$title[1][0],$tplt); | |
$tplt = str_replace("#author#",$author[1][0],$tplt); | |
$tplt = str_replace("#content#",$content[1][0],$tplt); | |
echo $tplt; | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment