Skip to content

Instantly share code, notes, and snippets.

@xcaptain
Created September 18, 2016 06:12
Show Gist options
  • Save xcaptain/a117c49661a97fb96922872976d6fb41 to your computer and use it in GitHub Desktop.
Save xcaptain/a117c49661a97fb96922872976d6fb41 to your computer and use it in GitHub Desktop.
用php抓取roll.news.qq.com的内容
<?php
class RollNewsSpider
{
private $url = 'http://roll.news.qq.com/interface/roll.php';
public function run()
{
$data = $this->getJsonData();
var_dump($data);
}
private function getJsonData()
{
$fullUrl = $this->makeRequestUrl();
$data = $this->getRawData($fullUrl);
$data = $this->parseData($data);
return $data;
}
private function parseData($rawData)
{
return json_decode($rawData, true);
}
private function getRawData($url)
{
$data = '';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_REFERER, 'http://roll.news.qq.com');
curl_setopt($ch,CURLOPT_HTTPHEADER, [
// "Content-Type: application/json; charset=utf-8",
"Content-Type:text/html; charset=gbk"
]);
curl_setopt($ch, CURLOPT_ENCODING , "gzip");
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
private function makeRequestUrl()
{
$queryArr = [
'cata' => '',
'site' => 'news',
'data' => '',
'page' => 1,
'mode' => 1,
'of' => 'json'
];
$queryStr = http_build_query($queryArr);
$randMax = 10000;
$randNum = random_int(1, $randMax)/$randMax;
$queryStr .= '&'.$randNum;
return $this->url . '?' . $queryStr;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment