Skip to content

Instantly share code, notes, and snippets.

@diman3210
Created May 23, 2017 05:28
Show Gist options
  • Save diman3210/a1ddb945633fa9b9152773f673dd9585 to your computer and use it in GitHub Desktop.
Save diman3210/a1ddb945633fa9b9152773f673dd9585 to your computer and use it in GitHub Desktop.
<?php
ini_set('max_execution_time', '58');
set_time_limit(58);
ini_set('memory_limit', '4048M');
error_reporting(E_ALL);
ignore_user_abort(true);
function parse_1str () {
define('__MYDIR__' , str_replace('\\', '/', __DIR__ ));
require_once 'phpQuery/phpQuery-onefile.php';
require_once 'recognize/recognize.php';
require_once 'functions/proxy/getproxy.php';
$link = mysqli_connect('localhost', 'root', '', 'metalloprokat');
$fd = fopen('parser_items.txt', 'ab');
$query = "SELECT * FROM `items` WHERE `parse` = 0 LIMIT 1";
$result = mysqli_query($link, $query) or die ("Не могу выбрать страницу из базы ".mysqli_error($link));
$result = mysqli_fetch_assoc($result);
$id = $result['id'];
$url = $result['url'];
//Берем прокси из БД
$proxy = getproxy();
$proxy_host = $proxy['host'];
$proxy_id = $proxy['id'];
$proxy_port = $proxy['port'];
$proxy_username = $proxy['username'];
$proxy_pass = $proxy['pass'];
$user_agent = $proxy['user-agent'];
$proxy_type = $proxy['type'];
//конец Берем прокси из БД
fwrite ($fd, "$url пытаемся скачать ".date("Y-m-d H:i:s")."\r\n");
$curl = curl_init();
$cookie = __MYDIR__ ."/cookie/cookie$id.txt";
$headers = [
'Referer: http://www.google.com/',
"User-Agent: $user_agent"
];
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($curl, CURLOPT_HTTPHEADER, $headers);
curl_setopt($curl, CURLOPT_TIMEOUT, 20);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($curl, CURLOPT_COOKIEFILE, $cookie);
curl_setopt($curl, CURLOPT_COOKIEJAR, $cookie);
$str = curl_exec($curl);
if (curl_getinfo($curl, CURLINFO_HTTP_CODE) == 200) {
fwrite ($fd, "$url скачали ".date("Y-m-d H:i:s")."\r\n");
$pq = phpQuery::newDocument($str);
$descr = $pq->find(".product-description");
$descr->find("div > strong")->remove();
$descr = trim($descr->text());
$descr = mysqli_real_escape_string($link, $descr);
$callback = $pq->find(".title-callback > div");
$callback->find("span")->remove();
$callback = $callback->text();
echo $callback;
$callback = preg_replace("#\s+по\s*$#Uus", '', $callback);
$callback = mysqli_real_escape_string($link, $callback);
$query = "UPDATE `items` SET `description` = '$descr', `callback` = '$callback', `parse` = 1 WHERE `id` = '$id'";
mysqli_query($link, $query) or die("Не могу обновить items ".mysqli_error($link));
}
else {
fwrite ($fd, "$url не удалось скачать страницу ".date("Y-m-d H:i:s")."\r\n");
}
fclose($fd);
curl_close($curl);
mysqli_close($link);
}
while (true) {
parse_1str();
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment