Created
September 18, 2016 06:12
-
-
Save xcaptain/a117c49661a97fb96922872976d6fb41 to your computer and use it in GitHub Desktop.
用php抓取roll.news.qq.com的内容
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class RollNewsSpider | |
{ | |
private $url = 'http://roll.news.qq.com/interface/roll.php'; | |
public function run() | |
{ | |
$data = $this->getJsonData(); | |
var_dump($data); | |
} | |
private function getJsonData() | |
{ | |
$fullUrl = $this->makeRequestUrl(); | |
$data = $this->getRawData($fullUrl); | |
$data = $this->parseData($data); | |
return $data; | |
} | |
private function parseData($rawData) | |
{ | |
return json_decode($rawData, true); | |
} | |
private function getRawData($url) | |
{ | |
$data = ''; | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_URL, $url); | |
curl_setopt($ch, CURLOPT_REFERER, 'http://roll.news.qq.com'); | |
curl_setopt($ch,CURLOPT_HTTPHEADER, [ | |
// "Content-Type: application/json; charset=utf-8", | |
"Content-Type:text/html; charset=gbk" | |
]); | |
curl_setopt($ch, CURLOPT_ENCODING , "gzip"); | |
curl_setopt($ch, CURLOPT_HEADER, 0); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |
$data = curl_exec($ch); | |
curl_close($ch); | |
return $data; | |
} | |
private function makeRequestUrl() | |
{ | |
$queryArr = [ | |
'cata' => '', | |
'site' => 'news', | |
'data' => '', | |
'page' => 1, | |
'mode' => 1, | |
'of' => 'json' | |
]; | |
$queryStr = http_build_query($queryArr); | |
$randMax = 10000; | |
$randNum = random_int(1, $randMax)/$randMax; | |
$queryStr .= '&'.$randNum; | |
return $this->url . '?' . $queryStr; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment