Skip to content

Instantly share code, notes, and snippets.

@horsley
Last active Mar 6, 2017
Embed
What would you like to do?
Vagex Robot
<?php
/**
* Created by JetBrains PhpStorm.
* User: horsley
* Date: 13-1-14
* Time: 下午2:20
* To change this template use File | Settings | File Templates.
*
* 参考 @link https://apidoc.sinaapp.com/sae/SaeFetchurl.html
* @link http://josephscott.org/archives/2010/03/php-helpers-curl_http_request/
*/
class HttpReq
{
public $response = array();
private $cookies = array();
private $headers = array();
private $curl_opt = array();
function __construct() {
$this->curl_opt = array(
CURLOPT_AUTOREFERER => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HEADER => true,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => 0,
);
$this->setConnectionTimeout(5); //默认连接超时 5s
$this->setTotalTimeout(15); //执行超时15s
$this->setUserAgent('Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1'); //默认ua
//$this->setProxy('192.168.11.36:8880'); //fiddler debug
}
/**
* 设置代理,如127.0.0.1:8888
* @param $proxy
* @param bool $is_socks5
*/
public function setProxy($proxy, $is_socks5 = false) {
$this->curl_opt[CURLOPT_PROXY] = $proxy;
if ($is_socks5) {
$this->curl_opt[CURLOPT_PROXYTYPE] = CURLPROXY_SOCKS5;
}
}
/**
* 设置请求方法,如get post
* @param string $method
*/
public function setMethod($method = 'GET') {
$this->curl_opt[CURLOPT_CUSTOMREQUEST] = $method;
if ($method == 'POST') {
$this->curl_opt[CURLOPT_POST] = true;
} else if ( $method == 'HEAD' ) {
$curl_opt[CURLOPT_NOBODY] = true;
}
}
/**
* 设置连接超时
* @param $second
*/
public function setConnectionTimeout($second) {
$this->curl_opt[CURLOPT_CONNECTTIMEOUT] = $second;
}
/**
* 设置执行超时
* @param $second
*/
public function setTotalTimeout($second) {
$this->curl_opt[CURLOPT_TIMEOUT] = $second;
}
/**
* 设置ua
* @param $ua
*/
public function setUserAgent($ua) {
$this->curl_opt[CURLOPT_USERAGENT] = $ua;
}
/**
* 批量设置cookie
* @param $cookie_arr
*/
public function setCookies($cookie_arr) {
if ($cookie_arr) {
foreach($cookie_arr as $k => $v) {
$this->setCookie($k, $v);
}
}
}
/**
* 设置一条cookie
* @param $cookie_name
* @param $cookie_value
*/
public function setCookie($cookie_name, $cookie_value) {
$this->cookies[$cookie_name] = $cookie_value;
}
/**
* 设置一条header
* @param $header_name
* @param $header_value
*/
public function setHeader($header_name, $header_value) {
$this->headers[$header_name] = $header_value;
}
/**
* 设置post提交值,会覆盖前面的设置
* @param $post_arr
* @param $multipart 是否为二进制数据
*/
public function setPostData($post_arr, $multipart = false) {
if (empty($post_arr) && isset($this->curl_opt[CURLOPT_POSTFIELDS])) {
unset($this->curl_opt[CURLOPT_POSTFIELDS]);
return;
}
if (!$multipart) {
foreach ($post_arr as $k => &$p) {
$p = urlencode($p);
$p = "$k=$p";
}
$this->curl_opt[CURLOPT_POSTFIELDS] = implode('&', $post_arr);
} else {
$this->curl_opt[CURLOPT_POSTFIELDS] = $post_arr;
}
}
/**
* 取已设置的post参数
* @return array|string
*/
public function getPostData() {
$post_arr = $this->curl_opt[CURLOPT_POSTFIELDS];
if (is_array($post_arr)) {
foreach($post_arr as &$p) {
$p = urldecode($p);
}
return $post_arr;
} else if (is_string($post_arr)) {
$post_arr = explode('&', $post_arr);
$count = count($post_arr);
for ($i = 0; $i < $count; $i++) {
list($k, $v) = explode('=', $post_arr[$i], 2);
unset($post_arr[$i]);
$post_arr[$k] = $v;
}
return $post_arr;
}
}
private function _prepare_custom_fields() {
if (count($this->cookies) > 0) { //cookies init
$formatted = array();
foreach($this->cookies as $k => $v) {
$formatted[] = "$k=$v";
}
$this->curl_opt[CURLOPT_COOKIE] = implode( ';', $formatted );
}
if (count($this->headers) > 0) { //headers init
$formatted = array();
foreach($this->headers as $k => $v) {
$formatted[] = "$k: $v";
}
$this->curl_opt[CURLOPT_HTTPHEADER] = $formatted;
}
}
/**
* 抓取
* @param $url
* @return bool
*/
public function fetch( $url ) {
$this->_prepare_custom_fields();
$curl = curl_init( $url );
curl_setopt_array( $curl, $this->curl_opt );
$this->response['body'] = curl_exec( $curl );
$this->response['err_no'] = curl_errno( $curl );
$this->response['err_msg'] = curl_error( $curl );
$this->response['info'] = curl_getinfo( $curl );
curl_close( $curl );
//cut body and header
$this->response['headers'] = trim( substr( $this->response['body'], 0, $this->response['info']['header_size'] ) );
$this->response['body'] = substr( $this->response['body'], $this->response['info']['header_size'] );
// //手动的跟踪302跳转
// //参考http://php.net/manual/en/function.curl-setopt.php#102121
// if ($info['http_code'] == 301 || $info['http_code'] == 302) {
// $new_url = $headers['location'];
// return $this->fetch($new_url);
// }
if ($this->response['err_no'] == 0) {
return $this->response['body'];
} else {
return false;
}
}
/**
* 取得返回的http头
* @param $parse
* @return mixed|string
*/
public function getHeaders($parse = true) {
$headers = array_pop( explode( "\r\n", $this->response['headers'], 2 ) );
if (!$parse) {
return $headers;
}
$headers = explode("\r\n", $headers);
$headers_new = array();
foreach ( $headers as $line ) {
@list( $k, $v ) = explode( ':', $line, 2 );
if ( empty( $v ) ) {
continue;
}
if ( strtolower( $k ) == 'set-cookie' ) {
if (array_key_exists($k, $headers_new)) {
array_push($headers_new[$k], trim( $v ));
} else {
$headers_new[$k] = array(trim( $v ));
}
} else {
$headers_new[$k] = trim( $v );
}
}
return $headers_new;
}
public function getCookies($all = true)
{
$header = $this->response['headers'];
$matchs = array();
$cookies = array();
$kvs = array();
if (preg_match_all('/Set-Cookie:\s([^\r\n]+)/i', $header, $matchs)) {
foreach ($matchs[1] as $match) {
$cookie = array();
$items = explode(";", $match);
foreach ($items as $_) {
$item = explode("=", trim($_));
if (count($item) == 2) {
$cookie[$item[0]]= $item[1];
}
}
array_push($cookies, $cookie);
$kvs = array_merge($kvs, $cookie);
}
}
if ($all) {
return $cookies;
} else {
unset($kvs['path']);
unset($kvs['max-age']);
return $kvs;
}
}
}
<?php
/**
* Created by JetBrains PhpStorm.
* User: horsley
* Date: 13-2-2
* Time: 上午10:39
* To change this template use File | Settings | File Templates.
*/
if (PHP_SAPI !== 'cli') {
die ("This is CLI only version!");
} else {
$v = new VagexCheater();
$v->set_userid('240907');
$v->set_youtube_email('horsley.bot@gmail.com');
$v->set_proxy('127.0.0.1:15846', true);
$v->set_youtube_proxy("http://af.ihorsley.com/video_info.php"); //china mode
$v->run();
}
/**
* Class Vagex Cheater with cli log output
*/
class VagexCheater {
const VAGEX_URL_A = 'http://vagex.com/ffupdater151a.php';
const VAGEX_URL_B = 'http://vagex.com/ffupdater151b.php';
const VAGEX_URL_E = 'http://vagex.com/ffupdater151e.php';
const VAGEX_SPR_SID = 'SID:::|';
const VAGEX_SPR_EOF = ':::<br>';
const VAGEX_SPR_EOL = '::||::<br>';
const VAGEX_SPR_VNO = ':::';
const VAGEX_SPR_FLD = '|:|';
const VAGEX_RE_ERR = '/\:\|(.*?)\|\:/';
const VAGEX_RE_EAR = '/\:\:\|\|(.*?)\|\|\:\:/';
private $log_file;
private $http;
private $sleep_time;
private $data_default = array(
'userid' => '0',
'build' => '20120713134347',
'ua' => 'Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1',
'versid' => '1.5.7',
'ffversion' => '14.0.1',
'safemode' => 'false',
'os' => 'Windows NT 6.1',
'email' => 'abc@gmail.com',
'username' => 'username_catching_error', //由于youtube页面结构变动,原方法就是抓不到用户名
'chk_runtime' => 'true',
'flash' => 'true',
'html5' => 'true'
);
public $data_dynamic = array();
function __construct() {
$this->http = new HttpReq();
$this->http->setUserAgent($this->data_default['ua']); //默认ua
$this->log("Vagex Cheater instance initialized");
}
function set_log_file($filename) {
$this->log_file = $filename;
$this->log("Set log file: " . $filename);
}
function set_userid($uid) {
$this->data_default['userid'] = $uid;
$this->log("Set user id: " . $uid);
}
function set_proxy($proxy, $is_sock5 = false) {
$this->http->setProxy($proxy, $is_sock5);
$this->log("Set proxy: " . ($is_sock5?'sock5://':'') . $proxy );
}
function set_youtube_email($email) {
$this->data_default['email'] = $email;
$this->log("Set youtube email: " . $email);
}
function set_youtube_username($username) {
$this->data_default['username'] = $username;
$this->log("Set youtube username: " . $username);
}
function set_youtube_proxy($proxy_url) {
$this->data_dynamic['youtube_proxy'] = $proxy_url;
$this->log("Set youtube proxy: " . $proxy_url);
}
function run() {
$this->log('Start to run main routine');
while(true) {
$this->log("A new loop of a video array start");
if($this->update_video_arr()) {
foreach($this->data_dynamic['video_arr'] as $vc_item) {
$this->log('Deal with item:' . json_encode($vc_item));
$this->sleep_time = intval($vc_item[1][1]) + rand(14, 16); //sleep time
$this->log('Let\'s sleep for ' . $this->sleep_time . ' seconds');
sleep($this->sleep_time);
$this->log('Wake up, report processed');
if($result = $this->report_processed($vc_item[0])) {
$this->data_dynamic['nc']++;
$this->log('Earnt: ' . $result);
}
}
} else {
$this->log('fail update video array, sleep 120 seconds');
sleep(120);
}
}
}
/**
* get video items
* @return bool
*/
function update_video_arr() {
$this->log('Requesting new Show Array.');
$this->http->setMethod('POST');
$this->http->setPostData(array(
'userid' => $this->data_default['userid'],
'ua' => $this->data_default['ua'],
'build' => $this->data_default['build'],
'versid' => $this->data_default['versid']
));
$this->http->fetch(self::VAGEX_URL_A);
$_ = explode(self::VAGEX_SPR_SID, $this->http->response['body']);
if (count($_) != 2) {
$this->log("Cut Show Array Failed");
return false;
}
$this->log('Show Array Request Data Received...');
$this->data_dynamic['sid'] = array_shift(explode(self::VAGEX_SPR_EOF, $_[1]));
$this->data_dynamic['video_arr'] = explode(self::VAGEX_SPR_EOL, $_[0]);
if (($last_item = array_pop($this->data_dynamic['video_arr'])) != '') array_push($this->data_dynamic['video_arr'], $last_item);
foreach($this->data_dynamic['video_arr'] as &$v) {
if (empty($v)) continue;
$e = explode(self::VAGEX_SPR_VNO, $v); //$e[0] = video_no
$v = array($e[0]);
$v[] = explode(self::VAGEX_SPR_FLD, $e[1]);
}
$this->log('Show Array parse end, array count: ' . count($this->data_dynamic['video_arr']));
return true;
}
function report_processed($video_no) {
$this->log('report_processed start');
$PostData = $this->make_report_data($video_no);
$PostFields = array();
foreach ($PostData as $k => $v) $PostFields[] = "$k=$v";
$PostDataStr = base64_encode(implode('&', $PostFields));
$this->http->setMethod('POST');
$this->http->setPostData(array('data' => $PostDataStr));
if ($response_body = $this->http->fetch(self::VAGEX_URL_B)) {
preg_match(self::VAGEX_RE_ERR, $response_body, $match);
$err_msg = $match[1];
if (strlen($err_msg) != 0 && substr($err_msg, 0, 16) == 'YTUser done over') { //youtube username limit exceed
$this->generate_random_ytusername();
}
preg_match(self::VAGEX_RE_EAR, $response_body, $match);
return $match[1];
} else {
$this->log('report_processed Failed');
return false;
}
}
/**
* Wrap all data that need to post
* @param $video_no
* @return array
*/
function make_report_data($video_no) {
//$this->log('make_report_data start');
$this->get_dynamic_data($video_no);
$this->make_fake_data($video_no);
return array(
'userid' => $this->data_default['userid'],
'versid' => $this->data_default['versid'],
'ffversion' => $this->data_default['ffversion'],
'safemode' => $this->data_default['safemode'],
'os' => urlencode($this->data_default['os']),
'vgxsid' => urlencode($this->data_dynamic['sid']),
'url' => urlencode($this->data_dynamic['url']),
'length' => $this->data_dynamic['length'],
'exactTime' => $this->data_dynamic['exactTime'],
'email' => urlencode($this->data_default['email']),
'username' => urlencode($this->data_default['username']),
'watcheduser' => urlencode($this->data_dynamic['watcheduser']),
'liked' => $this->data_dynamic['liked'] == 1 ? 'true' : 'false',
'subed' => $this->data_dynamic['subed'] == 1 ? 'true' : 'false',
'siteid' => $this->data_dynamic['siteid'],
'nv' => $video_no,
'nc' => $this->data_dynamic['nc'],
'chk_runtime' => $this->data_default['chk_runtime'],
'flash' => $this->data_default['flash'],
'pageData' => urlencode($this->data_dynamic['pageData']),
'machine' => urlencode($this->data_dynamic['machine']),
'html5' => $this->data_default['html5'],
'duration' => $this->data_dynamic['duration'],
'currTime' => $this->data_dynamic['currTime'],
'speed' => $this->data_dynamic['speed'],
'ts' => urlencode($this->data_dynamic['ts']),
);
}
/**
* get dynamic data from play page
* @param $video_no
*/
function get_dynamic_data($video_no) {
$youtube_proxy = (isset($this->data_dynamic['youtube_proxy']) && !empty($this->data_dynamic['youtube_proxy'])) ?
$this->data_dynamic['youtube_proxy'] : false;
if( $youtube_proxy ) { //china mode
$full_url = $youtube_proxy . '?id=' . base64_encode($this->get_vid($video_no));
$this->simple_fetch($full_url);
$play_page_response = $this->http->response;
$play_page_response = json_decode(base64_decode($play_page_response['body']), true);
$this->data_dynamic['watcheduser'] = $play_page_response['watcheduser'];
$this->data_dynamic['pageData'] = $play_page_response['pageData'];
$this->data_dynamic['machine'] = $play_page_response['machine'];
$this->data_dynamic['duration'] = $play_page_response['duration'];
} else {
$full_url = $this->get_video_url($video_no);
$this->simple_fetch($full_url);
$play_page_response = $this->http->response;
$this->data_dynamic['watcheduser'] = self::get_watched_userid($play_page_response['body']);
$this->data_dynamic['pageData'] = self::get_page_title($play_page_response['body']);
$this->data_dynamic['machine'] = self::get_visitor_id($play_page_response['headers']);
$this->data_dynamic['duration'] = self::get_video_duration($play_page_response['body']);
}
$this->data_dynamic['url'] = $this->data_dynamic['video_arr'][$video_no][1][0];
$this->data_dynamic['liked'] = $this->data_dynamic['video_arr'][$video_no][1][3];
$this->data_dynamic['subed'] = $this->data_dynamic['video_arr'][$video_no][1][4];
$this->data_dynamic['siteid'] = $this->data_dynamic['video_arr'][$video_no][1][2];
$this->data_dynamic['nv'] = $video_no;
if (!isset($this->data_dynamic['nc'])) $this->data_dynamic['nc'] = 0;
//$this->log('get_dynamic_data end, nc: ' . $this->data_dynamic['nc']);
}
/**
* Make fake data base on random numbers or previous data
* @param $video_no
*/
function make_fake_data ($video_no) {
$duration = round($this->data_dynamic['duration'] + lcg_value(), rand(2, 3));
$require_time = $this->data_dynamic['video_arr'][$video_no][1][1];
$this->data_dynamic['duration'] = "$duration";
if ($duration < $require_time + rand(1,10) ) { // video too short, it must play to the end
$this->data_dynamic['currTime'] = "$duration";
} else {
$duration += rand(2, 8);
$duration = round($duration + lcg_value(), rand(2, 3));
$this->data_dynamic['currTime'] = "$duration";
}
$this->data_dynamic['speed'] = rand(140000, 200000) + lcg_value();
$this->data_dynamic['length'] = $this->sleep_time + rand(1, 2);
$this->data_dynamic['exactTime'] = $this->sleep_time - rand(7, 9);
$new_speed = rand(60, 69) + lcg_value();
$this->data_dynamic['min_speed'] = (isset($this->data_dynamic['min_speed']) && $new_speed < $this->data_dynamic['min_speed']) ? $new_speed : $this->data_dynamic['min_speed'];
$new_speed = rand(250000, 470000) + lcg_value();
$this->data_dynamic['max_speed'] = (isset($this->data_dynamic['max_speed']) && $new_speed > $this->data_dynamic['max_speed']) ? $new_speed : $this->data_dynamic['max_speed'];
$rand_time = array(
rand(ceil($this->data_dynamic['min_speed']), floor($this->data_dynamic['max_speed'])) + lcg_value(),
rand(9000, 18200) + lcg_value(),
rand(16000, 21000) + lcg_value(),
rand(55000, 90000) + lcg_value(),
);
$this->data_dynamic['ts'] = sprintf('%f:%f:%f:%f:%f:%f',
$this->data_dynamic['min_speed'],
$this->data_dynamic['max_speed'],
$rand_time[0],
$rand_time[1],
$rand_time[2],
$rand_time[3]
);
if ($this->data_default['username'] == 'username_catching_error') {
$this->generate_random_ytusername();
}
}
function generate_random_ytusername() {
$this->data_default['username'] = self::generateRandomString(); //using default username catching error leads to like and sub not counting!!
}
/**
* make full youtube url by vid
* @param $video_no
* @return string
*/
function get_video_url($video_no) {
return 'http://www.youtube.com/watch?v=' . $this->get_vid($video_no);
}
/**
* youtube video data api
* @param $video_no
* @return string
*/
function get_video_info_url($video_no) {
return 'http://gdata.youtube.com/feeds/api/videos/' . $this->get_vid($video_no) . '?v=2&alt=json';
}
/**
* Get Show item's youtube vid
* @param $video_no
* @return mixed
*/
function get_vid($video_no) {
return $this->data_dynamic['video_arr'][$video_no][1][0];
}
/**
* HTTP Get and return response body
* @param $url
* @return mixed
*/
function simple_fetch($url) {
$this->http->setMethod('GET');
return $this->http->fetch($url);
}
/**
* Preg find page title from html
* @param $html
* @return mixed
*/
static function get_page_title($html) {
preg_match('/<title>(.*)<\/title>/', $html, $match);
return $match[1];
}
/**
* Preg Youtube visitor id from response cookie
* @param $head
* @return mixed
*/
static function get_visitor_id($head) {
preg_match('/VISITOR_INFO1_LIVE=(.*?);/', $head, $match);
var_dump($match);
return $match[1];
}
/**
* Preg Youtube video owner id from html
* @param $html
* @return mixed
*/
static function get_watched_userid($html) {
preg_match('/yt-uix-sessionlink yt-user-videos.*\/user\/(.*)\//', $html, $match);
return $match[1];
}
/**
* Preg Youtube video duration
* @param $html
* @return mixed
*/
static function get_video_duration($html) {
preg_match('/length_seconds=(\d+)\\\\u0026amp;/', $html, $match);
return $match[1];
}
static function generateRandomString($length = 10) {
$characters = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
$randomString = '';
for ($i = 0; $i < $length; $i++) {
$randomString .= $characters[rand(0, strlen($characters) - 1)];
}
return $randomString;
}
function log($log_line) {
$time_array = explode(" ", microtime());
$time_array[0] = sprintf('%.6f', $time_array[0]);
$time = date('Y/m/d H:i:s.', $time_array[1]) . substr($time_array[0], 2) ;
if(!empty($this->log_file)) {
file_put_contents($this->log_file, "[$time] $log_line" . PHP_EOL, FILE_APPEND | LOCK_EX);
}
echo "[$time] $log_line" . PHP_EOL;
}
}
////////////// End Class VagexCheater ///////////////////
/**
* Http Request Class
* 参考 @link https://apidoc.sinaapp.com/sae/SaeFetchurl.html
* @link http://josephscott.org/archives/2010/03/php-helpers-curl_http_request/
*/
class HttpReq
{
public $response = array();
private $cookies = array();
private $headers = array();
private $curl_opt = array();
function __construct() {
$this->curl_opt = array(
CURLOPT_AUTOREFERER => true,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HEADER => true,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => 0,
);
$this->setConnectionTimeout(5); //默认连接超时 5s
$this->setTotalTimeout(15); //执行超时15s
$this->setUserAgent('Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1'); //默认ua
//$this->setProxy('192.168.11.36:8880'); //fiddler debug
}
/**
* 设置代理,如127.0.0.1:8888
* @param $proxy
* @param bool $is_socks5
*/
public function setProxy($proxy, $is_socks5 = false) {
$this->curl_opt[CURLOPT_PROXY] = $proxy;
if ($is_socks5) {
$this->curl_opt[CURLOPT_PROXYTYPE] = CURLPROXY_SOCKS5;
}
}
/**
* 设置请求方法,如get post
* @param string $method
*/
public function setMethod($method = 'GET') {
$this->curl_opt[CURLOPT_CUSTOMREQUEST] = $method;
if ($method == 'POST') {
$this->curl_opt[CURLOPT_POST] = true;
} else if ( $method == 'HEAD' ) {
$curl_opt[CURLOPT_NOBODY] = true;
}
}
/**
* 设置连接超时
* @param $second
*/
public function setConnectionTimeout($second) {
$this->curl_opt[CURLOPT_CONNECTTIMEOUT] = $second;
}
/**
* 设置执行超时
* @param $second
*/
public function setTotalTimeout($second) {
$this->curl_opt[CURLOPT_TIMEOUT] = $second;
}
/**
* 设置ua
* @param $ua
*/
public function setUserAgent($ua) {
$this->curl_opt[CURLOPT_USERAGENT] = $ua;
}
/**
* 批量设置cookie
* @param $cookie_arr
*/
public function setCookies($cookie_arr) {
if ($cookie_arr) {
foreach($cookie_arr as $k => $v) {
$this->setCookie($k, $v);
}
}
}
/**
* 设置一条cookie
* @param $cookie_name
* @param $cookie_value
*/
public function setCookie($cookie_name, $cookie_value) {
$this->cookies[$cookie_name] = $cookie_value;
}
/**
* 设置一条header
* @param $header_name
* @param $header_value
*/
public function setHeader($header_name, $header_value) {
$this->headers[$header_name] = $header_value;
}
/**
* 设置post提交值,会覆盖前面的设置
* @param $post_arr
* @param $multipart 是否为二进制数据
*/
public function setPostData($post_arr, $multipart = false) {
if (empty($post_arr) && isset($this->curl_opt[CURLOPT_POSTFIELDS])) {
unset($this->curl_opt[CURLOPT_POSTFIELDS]);
return;
}
if (!$multipart) {
foreach ($post_arr as $k => &$p) {
$p = urlencode($p);
$p = "$k=$p";
}
$this->curl_opt[CURLOPT_POSTFIELDS] = implode('&', $post_arr);
} else {
$this->curl_opt[CURLOPT_POSTFIELDS] = $post_arr;
}
}
/**
* 取已设置的post参数
* @return array|string
*/
public function getPostData() {
$post_arr = $this->curl_opt[CURLOPT_POSTFIELDS];
if (is_array($post_arr)) {
foreach($post_arr as &$p) {
$p = urldecode($p);
}
return $post_arr;
} else if (is_string($post_arr)) {
$post_arr = explode('&', $post_arr);
$count = count($post_arr);
for ($i = 0; $i < $count; $i++) {
list($k, $v) = explode('=', $post_arr[$i], 2);
unset($post_arr[$i]);
$post_arr[$k] = $v;
}
return $post_arr;
}
}
private function _prepare_custom_fields() {
if (count($this->cookies) > 0) { //cookies init
$formatted = array();
foreach($this->cookies as $k => $v) {
$formatted[] = "$k=$v";
}
$this->curl_opt[CURLOPT_COOKIE] = implode( ';', $formatted );
}
if (count($this->headers) > 0) { //headers init
$formatted = array();
foreach($this->headers as $k => $v) {
$formatted[] = "$k: $v";
}
$this->curl_opt[CURLOPT_HTTPHEADER] = $formatted;
}
}
/**
* 抓取
* @param $url
* @return bool
*/
public function fetch( $url ) {
$this->_prepare_custom_fields();
$curl = curl_init( $url );
curl_setopt_array( $curl, $this->curl_opt );
$this->response['body'] = curl_exec( $curl );
$this->response['err_no'] = curl_errno( $curl );
$this->response['err_msg'] = curl_error( $curl );
$this->response['info'] = curl_getinfo( $curl );
curl_close( $curl );
//cut body and header
$this->response['headers'] = trim( substr( $this->response['body'], 0, $this->response['info']['header_size'] ) );
$this->response['body'] = substr( $this->response['body'], $this->response['info']['header_size'] );
if ($this->response['err_no'] == 0) {
return $this->response['body'];
} else {
return false;
}
}
/**
* 取得返回的http头
* @param $parse
* @return mixed|string
*/
public function getHeaders($parse = true) {
$headers = array_pop( explode( "\r\n", $this->response['headers'], 2 ) );
if (!$parse) {
return $headers;
}
$headers = explode("\r\n", $headers);
$headers_new = array();
foreach ( $headers as $line ) {
@list( $k, $v ) = explode( ':', $line, 2 );
if ( empty( $v ) ) {
continue;
}
if ( strtolower( $k ) == 'set-cookie' ) {
if (array_key_exists($k, $headers_new)) {
array_push($headers_new[$k], trim( $v ));
} else {
$headers_new[$k] = array(trim( $v ));
}
} else {
$headers_new[$k] = trim( $v );
}
}
return $headers_new;
}
public function getCookies($all = true)
{
$header = $this->response['headers'];
$matchs = array();
$cookies = array();
$kvs = array();
if (preg_match_all('/Set-Cookie:\s([^\r\n]+)/i', $header, $matchs)) {
foreach ($matchs[1] as $match) {
$cookie = array();
$items = explode(";", $match);
foreach ($items as $_) {
$item = explode("=", trim($_));
if (count($item) == 2) {
$cookie[$item[0]]= $item[1];
}
}
array_push($cookies, $cookie);
$kvs = array_merge($kvs, $cookie);
}
}
if ($all) {
return $cookies;
} else {
unset($kvs['path']);
unset($kvs['max-age']);
return $kvs;
}
}
}
////////////// End Class HttpReq ///////////////////
<?php
/**
* Created by JetBrains PhpStorm.
* User: horsley
* Date: 13-2-6
* Time: 下午7:21
* To change this template use File | Settings | File Templates.
*/
error_reporting(E_ALL);
include_once(dirname(__FILE__) . '/HttpReq.class.php');
if (isset($_GET['id']) && !empty($_GET['id'])) {
$url = base64_decode($_GET['id']);
$url = 'http://www.youtube.com/watch?v=' . $url;
$http = new HttpReq();
if ($http->fetch($url)) {
$result = new stdClass();
$result->watcheduser = get_watched_userid($http->response['body']);
$result->pageData = get_page_title($http->response['body']);
$result->machine = get_visitor_id($http->response['headers']);
$result->duration = get_video_duration($http->response['body']);
echo base64_encode(json_encode($result));
} else {
echo 'fetch error';
}
}
/**
* Preg find page title from html
* @param $html
* @return mixed
*/
function get_page_title($html) {
preg_match('/<title>(.*)<\/title>/', $html, $match);
return $match[1];
}
/**
* Preg Youtube visitor id from response cookie
* @param $head
* @return mixed
*/
function get_visitor_id($head) {
preg_match('/VISITOR_INFO1_LIVE=(.*?);/', $head, $match);
//var_dump($match);
return $match[1];
}
/**
* Preg Youtube video owner id from html
* @param $html
* @return mixed
*/
function get_watched_userid($html) {
preg_match('/\/user\/(.*?)\?feature=watch/', $html, $match);
return $match[1];
}
/**
* Preg Youtube video duration
* @param $html
* @return mixed
*/
function get_video_duration($html) {
preg_match('/length_seconds=(\d+)\\\\u0026amp;/', $html, $match);
return $match[1];
}
@horsley
Copy link
Author

horsley commented Jan 10, 2014

不需要装x,不需要vnc,不需要firefox,不需要插件,纯php刷vagex点

使用上其实……
其实只要一个变量就是你的vagex id,换掉源代码里面的

提一下下面3个东西,都在文件的开头

1.china mode的问题,在china访问youtube获取信息受限,所以你看见代码开头有set_youtube_proxy的东西,那个youtube_proxy就是部署在墙外的一个小脚本,用来获取视频信息的,待会我会补充在这个gist里面

  1. 运行方式:用php在命令行下面运行就可以
  2. 另外注意到脚本设置了一个127.0.0.1的代理,这是因为vagex算ip的嘛,我就拿plink用我的几个vps建立了ssh tunnel,这样就可以在我国内的机器上一台机器跑多个ip的实例,如果你直接放在国外的机器上,代理要去掉

@wogong
Copy link

wogong commented Jan 10, 2014

很奇怪,我将 video_info.php 放在自己的服务器上,views 数据一直是0,改成你的链接:http://af.ihorsley.com/video_info.php 才可以。试了很多次都是这样。

@foxni
Copy link

foxni commented Jan 10, 2014

缺少了HttpReq.class.php

@horsley
Copy link
Author

horsley commented Jan 10, 2014

HttpReq.class.php就是all in one里面底部的,分离出来用就好

@orzjerry
Copy link

orzjerry commented Jan 22, 2014

非常感谢大牛,谢谢!

@wzxjohn
Copy link

wzxjohn commented Jan 25, 2014

现在脚本一直卡在Cut Show Array Faild,将代理脚本部署到自己的机器上也不行。。。

@wzxjohn
Copy link

wzxjohn commented Jan 25, 2014

实测需要更改两个参数,
build改为20131205075310
versid改为1.6.2
即可正常使用

@a7898585
Copy link

a7898585 commented Jan 28, 2014

@wzxjohn
和你一样,也是cut show array failed
修改两个参数以后仍然是这个错误

@wogong
Copy link

wogong commented Jun 20, 2014

6月初就一直提示这个错误:sever return error msg:Unable to authenticate your connection
会修复么,多谢!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment