Created
May 23, 2017 05:28
-
-
Save diman3210/a1ddb945633fa9b9152773f673dd9585 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
ini_set('max_execution_time', '58'); | |
set_time_limit(58); | |
ini_set('memory_limit', '4048M'); | |
error_reporting(E_ALL); | |
ignore_user_abort(true); | |
function parse_1str () { | |
define('__MYDIR__' , str_replace('\\', '/', __DIR__ )); | |
require_once 'phpQuery/phpQuery-onefile.php'; | |
require_once 'recognize/recognize.php'; | |
require_once 'functions/proxy/getproxy.php'; | |
$link = mysqli_connect('localhost', 'root', '', 'metalloprokat'); | |
$fd = fopen('parser_items.txt', 'ab'); | |
$query = "SELECT * FROM `items` WHERE `parse` = 0 LIMIT 1"; | |
$result = mysqli_query($link, $query) or die ("Не могу выбрать страницу из базы ".mysqli_error($link)); | |
$result = mysqli_fetch_assoc($result); | |
$id = $result['id']; | |
$url = $result['url']; | |
//Берем прокси из БД | |
$proxy = getproxy(); | |
$proxy_host = $proxy['host']; | |
$proxy_id = $proxy['id']; | |
$proxy_port = $proxy['port']; | |
$proxy_username = $proxy['username']; | |
$proxy_pass = $proxy['pass']; | |
$user_agent = $proxy['user-agent']; | |
$proxy_type = $proxy['type']; | |
//конец Берем прокси из БД | |
fwrite ($fd, "$url пытаемся скачать ".date("Y-m-d H:i:s")."\r\n"); | |
$curl = curl_init(); | |
$cookie = __MYDIR__ ."/cookie/cookie$id.txt"; | |
$headers = [ | |
'Referer: http://www.google.com/', | |
"User-Agent: $user_agent" | |
]; | |
curl_setopt($curl, CURLOPT_URL, $url); | |
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); | |
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1); | |
curl_setopt($curl, CURLOPT_HTTPHEADER, $headers); | |
curl_setopt($curl, CURLOPT_TIMEOUT, 20); | |
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, 0); | |
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 0); | |
curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie); | |
curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie); | |
$str = curl_exec($curl); | |
if (curl_getinfo($curl, CURLINFO_HTTP_CODE) == 200) { | |
fwrite ($fd, "$url скачали ".date("Y-m-d H:i:s")."\r\n"); | |
$pq = phpQuery::newDocument($str); | |
$descr = $pq->find(".product-description"); | |
$descr->find("div > strong")->remove(); | |
$descr = trim($descr->text()); | |
$descr = mysqli_real_escape_string($link, $descr); | |
$callback = $pq->find(".title-callback > div"); | |
$callback->find("span")->remove(); | |
$callback = $callback->text(); | |
echo $callback; | |
$callback = preg_replace("#\s+по\s*$#Uus", '', $callback); | |
$callback = mysqli_real_escape_string($link, $callback); | |
$query = "UPDATE `items` SET `description` = '$descr', `callback` = '$callback', `parse` = 1 WHERE `id` = '$id'"; | |
mysqli_query($link, $query) or die("Не могу обновить items ".mysqli_error($link)); | |
} | |
else { | |
fwrite ($fd, "$url не удалось скачать страницу ".date("Y-m-d H:i:s")."\r\n"); | |
} | |
fclose($fd); | |
curl_close($curl); | |
mysqli_close($link); | |
} | |
while (true) { | |
parse_1str(); | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment