Created
February 12, 2019 12:44
-
-
Save andronex/27341df9897fe56b4cf447a16af7c622 to your computer and use it in GitHub Desktop.
Импорт товаров с сайта ipaar-poliv.ru / парсер для добавления / обновления каталога товаров для интернет-магазина на MODX Revolution 2.6.5-pl+ и miniShop2 2.4.10-pl+
2. В директории parser создать директорию thumbs для сохранения скачиваемых картинок/фото товаров
3. Загрузить в директорию /parser/query/phpquery-master/phpQuery/ файлы библиотеки phpQuery
4. Поменять значения переменных на свои в файле конфига
5. Создать две новые колонки в системной таблице класса modResource с названиями url_donor и article_donor (см. код плагина customFields.php)
6. Повешать плагин customFields.php на событие OnMODXInit для расширения системной таблицы двумя новыми полями
7. Запускать парсер по URL /parser/parser_ipaar.php?tree=1&page=0
VK / i.modx@ya.ru
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require_once(dirname(__FILE__).'/config_main.php'); | |
//настройки для сайтов доноров | |
$parseripaarURL = array( | |
'ipaar' => 'http://www.ipaar-poliv.ru/sitemap.xml' | |
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
//настройки для обработки товаров | |
$tplProduct = 18;//номер шаблона для карточки товара | |
$numOffset = 10;//число товаров для загрузки за 1 проход парсера, по умолчанию 10 | |
$tplParent = 17;//номер шаблона для категорий | |
$tplChildParent = 17;//номер шаблона для подкатегорий | |
$idParent = 204;//id каталога товаров | |
$articleTVnumber = 'article_donor';//название поля для записи уникального идентификатора категории товаров для последующего сравнения с деревом категорий поставщика | |
$uriTVnumber = 'url_donor';//название поля для записи URL товара/категории на сайте донора | |
$sizesTV = 20;//ID TV для записи различных цен в зависимости от опций или свойств товара |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
* плагин для расширения системной таблицы *_site_content двумя полями url_donor и article_donor | |
* предварительно поля создать в таблице | |
*/ | |
$tvs = array( | |
1 => 'url_donor' | |
,2 => 'article_donor' | |
); | |
switch ($modx->event->name) { | |
case 'OnMODXInit': | |
$modx->map['modResource']['fields']['url_donor'] = ''; | |
$modx->map['modResource']['fields']['article_donor'] = ''; | |
$modx->map['modResource']['fieldMeta']['url_donor'] = array( | |
'dbtype' => 'text', | |
'phptype' => 'string', | |
'null' => false, | |
'default' => '', | |
); | |
$modx->map['modResource']['fieldMeta']['article_donor'] = array( | |
'dbtype' => 'varchar', | |
'precision' => 255, | |
'phptype' => 'string', | |
'null' => false, | |
'default' => '', | |
); | |
break; | |
} | |
return true; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require_once(dirname(__FILE__).'/config.php'); | |
require(dirname(__FILE__).'/query/phpquery-master/phpQuery/phpQuery.php'); | |
$idParent = 3153;//переопределение id каталога товаров в Каталог -> Автополив | |
//функция рекурсивного удаления дублей вложенных массивов | |
function array_filter_recursive($input){ | |
foreach ($input as $key => &$value) | |
{ | |
if (is_array($value)) | |
{ | |
if($key == 'parent' || $key == 'url' || $key == 'title'){ | |
$value = $value[0]; | |
} | |
else{ | |
$value = array_filter_recursive($value); | |
} | |
} | |
} | |
return $input; | |
} | |
//функция получения страницы по URL | |
function get_web_page( $url ){ | |
$options = array( | |
CURLOPT_RETURNTRANSFER => true, // return web page | |
CURLOPT_HEADER => false, // don't return headers | |
CURLOPT_FOLLOWLOCATION => false, // follow redirects | |
CURLOPT_ENCODING => "", // handle all encodings | |
CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.52 Safari/537.17", // who am i | |
CURLOPT_AUTOREFERER => true, // set referer on redirect | |
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect | |
CURLOPT_TIMEOUT => 120, // timeout on response | |
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects | |
); | |
$ch = curl_init( $url ); | |
curl_setopt_array( $ch, $options ); | |
$content = curl_exec( $ch ); | |
$err = curl_errno( $ch ); | |
$errmsg = curl_error( $ch ); | |
$header = curl_getinfo( $ch ); | |
curl_close( $ch ); | |
$header['errno'] = $err; | |
$header['errmsg'] = $errmsg; | |
$header['content'] = $content; | |
return $header; | |
} | |
//функция рекурсивной проверки наличия ключа массива | |
function array_key_exists_recursive($key, $arr) { | |
if (array_key_exists($key, $arr)) { | |
return true; | |
} | |
foreach ($arr as $curval) { | |
if (is_array($curval)) { | |
if (array_key_exists_recursive($key, $curval)) { | |
return true; | |
} | |
} | |
} | |
return false; | |
} | |
//функция формирования URL | |
function buildURI($uri, $pURL, $is_category = false){ | |
if (0 !== strpos($uri, 'http')) { | |
$path = '/' . ltrim($uri, '/'); | |
if (extension_loaded('http')) { | |
$uri = http_build_url($pURL, array('path' => $path, 'query' => 'SHOWALL_1=1')); | |
} else { | |
$parts = parse_url($pURL); | |
$uri = $parts['scheme'] . '://'; | |
$uri .= $parts['host']; | |
if (isset($parts['port'])) { | |
$uri .= ':' . $parts['port']; | |
} | |
$uri .= $path; | |
if($is_category){ | |
$uri_parse = parse_url($uri); | |
$query = http_build_query(array('SHOWALL_1' => '1')); | |
if($uri_parse['query']){ | |
$query = $uri_parse['query'] . '&' . http_build_query(array('SHOWALL_1' => '1')); | |
} | |
$uri = $uri_parse['scheme'] . '://' . $uri_parse['host'] . $uri_parse['path'] . '?' . $query; | |
} | |
} | |
} | |
return ($uri)?:false; | |
} | |
//функция формирования дерева каталога (вызов из функции parseCategory) | |
function fetchTree($current_pq, $title_parent, $title_category, $uri_prod, $is_goods = false, $article = ''){ | |
$current_title = str_replace('Каталог', $title_category, $current_pq->find('span[itemprop="title"]')->text()); | |
$current_uri = $current_pq->find('a')->attr('href'); | |
if($current_title == $title_category){ | |
$out[$current_title]['parent'] = $title_parent; | |
} | |
else{ | |
$out[$title_parent]['categories'][$current_uri]['url'] = buildURI($current_uri, $uri_prod, true); | |
$out[$title_parent]['categories'][$current_uri]['title'] = $current_title; | |
$out[$current_title]['parent'] = $title_parent; | |
if($is_goods){ | |
$out[$current_title]['products'][ $article ]['url'] = $uri_prod; | |
} | |
} | |
if($title_parent == $title_category){ | |
$out[$title_category]['categories'][$current_uri]['url'] = buildURI($current_uri, $uri_prod, true); | |
$out[$title_category]['categories'][$current_uri]['title'] = $current_title; | |
} | |
return $out; | |
} | |
//функция рекурсивного парсинга страниц и построения структуры каталога | |
function parseCategory($document, $pURL, $targetURL, $title_category = 'Автополив', $out = array(), $title_parent = ''){ | |
$breads = $document->find('.bx-breadcrumb-item[itemprop="child"]'); | |
$new_product = $new_category = false; | |
$article = ($document->find('.product-one__acticle--a')->text())?:$targetURL; | |
$uri_prod = $targetURL; | |
$out = array(); | |
if(count($breads) > 0){ | |
$cat_i = 1; | |
$prep_title = ''; | |
foreach($breads as $bread){ | |
$rubrica = pq($bread); | |
if($cat_i == count($breads)){ | |
$out = array_merge_recursive($out, fetchTree($rubrica, $prep_title, $title_category, $uri_prod, true, $article)); | |
} | |
else{ | |
$out = array_merge_recursive($out, fetchTree($rubrica, $prep_title, $title_category, $uri_prod)); | |
} | |
$prep_title = str_replace('Каталог', $title_category, $rubrica->find('span[itemprop="title"]')->text()); | |
$cat_i++; | |
} | |
} | |
return ($out)?:''; | |
} | |
//функция скачивания и сохранения картинок | |
function downloadImg($image, $urlImages){ | |
global $modx; | |
if (!file_exists(dirname(__FILE__).'/thumbs/'.$image)) { | |
$ch = curl_init($urlImages); | |
curl_setopt($ch, CURLOPT_HEADER, 0); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |
curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1); | |
if(!file_put_contents(dirname(__FILE__).'/thumbs/'.$image, curl_exec($ch))){ | |
$modx->log(modX::LOG_LEVEL_ERROR, "Неудача при скачивании картинки товара {$urlImages}"); | |
return ''; | |
} | |
curl_close($ch); | |
} | |
return dirname(__FILE__).'/thumbs/'.$image; | |
} | |
//функция нахождения родителя для категории до тех пор, пока родитель не Автополив | |
function getTree($massiv, $grand_parent, $pagetitle){ | |
if(isset($massiv[$grand_parent]['categories'])){ | |
global $tag; | |
foreach($massiv[$grand_parent]['categories'] as $art => $category){ | |
if($category['title'] != $pagetitle) continue; | |
$data['identifier'] = "{$tag}_{$art}"; | |
$data['grand_parent'] = $massiv[$grand_parent]['parent']; | |
$data['parent'] = $grand_parent; | |
$data['pagetitle'] = $pagetitle; | |
$data['url_donor'] = $category['url']; | |
return $data; | |
} | |
} | |
return array(); | |
} | |
define('MODX_API_MODE', true); | |
require_once dirname(dirname(__FILE__)).'/index.php'; | |
// Включаем обработку ошибок | |
$modx->getService('error','error.modError'); | |
$modx->setLogLevel(modX::LOG_LEVEL_ERROR); | |
$modx->setLogTarget(XPDO_CLI_MODE ? 'ECHO' : 'HTML'); | |
$modx->error->message = null; // Обнуляем переменную | |
//функция создания категорий | |
function createCategory($massiv, $pagetitle, $parent, $grand_parent, $template = null) { | |
//return 12;//временно | |
global $tplParent,$tplChildParent; | |
global $modx; | |
global $idParent,$uriTVnumber,$articleTVnumber; | |
global $tag; | |
if(!$parent) $parent = $idParent; | |
if(!$template) $template = $tplParent; | |
$create = false; | |
if($tree = getTree($massiv, $grand_parent, $pagetitle)){ | |
while(!$cat = $modx->getObject('msCategory', array($articleTVnumber => $tree['identifier']))){ | |
$create = true; | |
$prop[] = array( | |
$articleTVnumber => $tree['identifier'], | |
$uriTVnumber => $tree['url_donor'], | |
'parent' => $tree['parent'], | |
'class_key' => 'msCategory', | |
'pagetitle' => $tree['pagetitle'], | |
'template' => $template, | |
'published' => 1, | |
'context_key' => 'web' | |
); | |
if(!$tree['grand_parent']) break; | |
$tree = getTree($massiv, $tree['grand_parent'], $tree['parent']); | |
} | |
} | |
if($create){ | |
$prop = array_reverse($prop); | |
foreach($prop as $new_cat){ | |
$prop_new_cat = $new_cat; | |
if($prop_new_cat['parent'] == 'Автополив') $prop_new_cat['parent'] = $parent; | |
else $prop_new_cat['parent'] = (isset($response->response['object']['id'])) ? $response->response['object']['id'] : $cat->id; | |
//print_r($prop_new_cat);exit; | |
$response = $modx->runProcessor('resource/create', $prop_new_cat); | |
if ($response->isError()) { | |
print_r($modx->error->failure($response->getMessage())); | |
return false; | |
} | |
} | |
} | |
return (isset($response->response['object']['id'])) ? $response->response['object']['id'] : $cat->id; | |
} | |
//функция добавления к товару картинок | |
function addImages($image, $res, $pagetitle){ | |
if ($image) { | |
global $modx; | |
$response_img = $modx->runProcessor('gallery/upload', | |
array('id' => $res->get('id'), 'name' => $pagetitle, 'file' => $image), | |
array('processors_path' => MODX_CORE_PATH.'components/minishop2/processors/mgr/') | |
); | |
if ($response_img->isError()) { | |
$modx->log(modX::LOG_LEVEL_ERROR, "Ошибка привязки картинки \"{$image}\" к товару id = {$res->get('id')}: \n". print_r($response_img->getAllErrors(), 1)); | |
} | |
else { | |
$sql = "UPDATE {$modx->getTableName('msProductFile')} SET `name` = \"{$pagetitle}\" WHERE `product_id` = {$res->get('id')};"; | |
$stmt = $modx->prepare($sql); | |
$stmt->execute(); | |
$stmt->closeCursor(); | |
unset($sql, $stmt); | |
unlink($image); | |
$modx->log(modX::LOG_LEVEL_INFO, "Удачно загружена картинка \"$image\": \n". print_r($response_img->getObject(), 1)); | |
return true; | |
} | |
} | |
return false; | |
} | |
//ф-ия создания новых ресурсов или обновления уже существующих | |
function createProduct($prop, $filtersArray = array(), $gallery = array(), $artForAlias = '') { | |
//return 'upd';//временно | |
global $tplProduct; | |
global $modx; | |
global $sizesTV; | |
$q = $modx->newQuery('msProduct'); | |
$q->select($modx->getSelectColumns('msProduct','msProduct').','.$modx->getSelectColumns('msProductData','Data').','.$modx->getSelectColumns('msProductOption','Option')); | |
$q->innerJoin('msProductData', 'Data', 'msProduct.id = Data.id'); | |
$q->rightJoin('msProductOption', 'Option', 'msProduct.id = Option.product_id'); | |
$q->where(array('msProduct.article_donor' => $prop['article_donor'])); | |
$q->prepare(); | |
if($res = $modx->getObject('msProduct', $q)) { | |
//print_r($res->toArray());exit; | |
//$prop['id'] = $artProd->contentid; | |
//$prop['pagetitle'] = $res->pagetitle; | |
//$prop['introtext'] = $res->introtext; | |
//$prop['content'] = $res->content; | |
//$prop['alias'] = $res->alias; | |
//$prop['parent'] = $res->parent; | |
//$prop['tv'.$sizesTV] = $prop['tv'.$sizesTV]; | |
//$prop['tv4'] = $res->getTVValue('image'); | |
//$prop['tv6'] = $prop['tv6']; | |
//$prop['published'] = $prop['published']; | |
$prop['editedon'] = date("Y-m-d H:i:s"); | |
//print_r($res->toArray());exit; | |
//корректное пересохранение опций товара | |
$resdata = $res->getOne('Data'); | |
$optionKeys = $resdata->getOptionKeys(); | |
//$optionKeys = $res->getOptionKeys(); | |
$productData = $res->toArray(); | |
if(count($optionKeys) > 0) { | |
$productOption = array(); | |
foreach ($optionKeys as $key) { | |
if(is_array($productData[$key])) { | |
foreach ($productData[$key] as $dataOption) { | |
$productOption[] = '('.$res->get('id').',"'.$key.'","'.$dataOption.'")'; | |
} | |
} | |
else if(!empty($productData[$key])) $productOption[] = '('.$res->get('id').',"'.$key.'","'.$productData[$key].'")'; | |
} | |
} | |
if (!empty($productOption)) { | |
$productOption = array_unique($productOption); | |
$sql = 'DELETE FROM '.$modx->getTableName('msProductOption').' WHERE product_id = ' . $res->get('id').';'; | |
$stmt = $modx->prepare($sql); | |
$stmt->execute(); | |
$stmt->closeCursor(); | |
$sql = 'INSERT INTO '.$modx->getTableName('msProductOption').' (`product_id`,`key`,`value`) VALUES ' . implode(',', $productOption).';'; | |
$stmt = $modx->prepare($sql); | |
$stmt->execute(); | |
$stmt->closeCursor(); | |
} | |
//$res->set('content',$prop['content']); | |
$res->set('new', $prop['new']); | |
//$res->set('popular', $prop['popular']); | |
//$res->set('favorite', $prop['favorite']); | |
$res->set('editedon', $prop['editedon']); | |
$res->set('published', $prop['published']); | |
$res->set('searchable', $prop['searchable']); | |
$res->set('price', $prop['price']); | |
/*if(isset($prop['tv'.$sizesTV])){ | |
$res->setTVValue($sizesTV, $prop['tv'.$sizesTV]); | |
}*/ | |
$res->save(); | |
//удаление картинок | |
//главной | |
if(isset($prop['image'])){ | |
if(file_exists($prop['image'])) unlink($prop['image']); | |
} | |
//дополнительных | |
if(isset($prop['images'])){ | |
foreach($prop['images'] as $imgs){ | |
if(file_exists($imgs)) unlink($imgs); | |
} | |
} | |
return 'upd'; | |
} | |
$modx->error->message = null; | |
$prop['original_pagetitle'] = $prop['pagetitle']; | |
$prop['pagetitle'] = $prop['article'].'-'.$prop['pagetitle']; | |
if(isset($prop['tv'.$sizesTV])){ | |
$prop['tvs'] = true; | |
} | |
$response = $modx->runProcessor('resource/create', $prop, | |
array('processors_path' => MODX_CORE_PATH.'model/modx/processors/') | |
); | |
if ($response->isError()) { | |
return $response->getAllErrors(); | |
} | |
$resource = $response->getObject(); | |
//удаление из pagetitle артикулов, нужных на стадии создания товара для формирования alias ресурса | |
$res = $modx->getObject('msProduct', array('id' => $resource['id'])); | |
$res->set('pagetitle', $prop['original_pagetitle']); | |
$res->save(); | |
//доп. категории только если их более одной в массиве $productTree[$prop['product_id']], начиная со второй, первая основная (parent) | |
/*if(count($productTree[$prop['product_id']]) > 1){ | |
$aCat = 0; | |
foreach ($productTree[$prop['product_id']] as $catProduct){ | |
$aCat++; | |
if($aCat == 1) continue; | |
$category = $modx->newObject('msCategoryMember'); | |
$category->set('category_id', $catProduct); | |
$category->set('product_id', $resource['id']); | |
$category->save(); | |
} | |
}*/ | |
//привязка фильтров к товару и к категории товара из массива фильтров $productFilters и массива фильтров товара $filtersArray | |
if(is_array($prop['options'])){ | |
foreach($prop['options'] as $keyF => $valF){ | |
if(!$valF) continue; | |
$optionProduct = $modx->newObject('msProductOption'); | |
$optionProduct->set('product_id', $resource['id']); | |
$optionProduct->set('key', $keyF); | |
$optionProduct->set('value', $valF); | |
$optionProduct->save(); | |
if($temp = $modx->getObject('msOption', array('key' => $keyF))){ | |
$optionId = $temp->get('id'); | |
$categoryId = $res->get('parent'); | |
if( !$modx->getObject('msCategoryOption', array('option_id' => $optionId, 'category_id' => $categoryId)) ){ | |
$optionCat = $modx->newObject('msCategoryOption'); | |
$optionCat->set('option_id', $optionId); | |
$optionCat->set('category_id', $categoryId); | |
$optionCat->set('active', 1); | |
$optionCat->save(); | |
} | |
unset($optionId, $categoryId); | |
} | |
unset($temp); | |
} | |
} | |
//удаление пустых значений фильтров из таблицы фильтров в БД | |
$sql = "DELETE FROM {$modx->getTableName('msProductOption')} WHERE `product_id` IN ({$resource['id']}) AND (`value`='' OR `value` IS NULL);"; | |
$stmt = $modx->prepare($sql); | |
$stmt->execute(); | |
$stmt->closeCursor(); | |
unset($sql, $stmt); | |
//построение массива групп товаров | |
/*if(isset($prop['groups_id'])) { | |
if (!function_exists('createGroups')) { | |
function createGroups ($id, $gid, $fileProductGroups) { | |
$productGroups = @file_get_contents($fileProductGroups); | |
if($productGroups) { | |
$productGroups = (array) unserialize($productGroups); | |
} else $productGroups = array(); | |
$productGroups[$gid][] = $id; | |
$productGroups = serialize($productGroups); | |
$file = fopen($fileProductGroups, 'w+'); | |
if($file) { | |
fwrite($file, $productGroups); | |
fclose ($file); | |
} else return false; | |
return true; | |
} | |
} | |
createGroups($resource['id'], $prop['groups_id'], $fileProductGroups); | |
}*/ | |
//привязка картинок | |
//главной | |
addImages($prop['image'], $res, $prop['original_pagetitle']); | |
//дополнительных | |
if(isset($prop['images'])){ | |
foreach($prop['images'] as $imgs){ | |
addImages($imgs, $res, $prop['original_pagetitle']); | |
} | |
} | |
unset($res, $resource); | |
return 'new'; | |
} | |
$document = ''; | |
$count = 0; | |
foreach($parseripaarURL as $tag => $pURL){ | |
$page = get_web_page($pURL); | |
unset($document); | |
//$document = phpQuery::newDocument($page['content']); | |
$complete = true; | |
//if(!file_exists(dirname(__FILE__)."/{$tag}.txt") || (time() - filemtime(dirname(__FILE__)."/{$tag}.txt")) > 0 || file_get_contents(dirname(__FILE__)."/{$tag}.txt") === '') { | |
if(isset($_GET['tree']) && $_GET['tree'] == 1){ | |
if(isset($_GET['page'])){ | |
$result_array = array(); | |
$parent = 'Автополив'; | |
$pageURL = $pURL; | |
if(file_exists(dirname(__FILE__)."/{$tag}.txt") && $_GET['page'] > 0){ | |
$result_array = unserialize(file_get_contents(dirname(__FILE__)."/{$tag}.txt")); | |
} | |
if($xml = new SimpleXMLElement($page['content'])) { | |
$iter = $off_tree = 0; | |
foreach ($xml->url as $url_list) { | |
$off_tree++; | |
if($off_tree <= (int)$_GET['page']) {continue;} | |
$url_goods = $url_list->loc; | |
if (stripos($url_goods, '/catalog/') === false) continue; | |
$complete = false; | |
$iter++; | |
$page = get_web_page($url_goods); | |
unset($document); | |
$document = phpQuery::newDocument($page['content']); | |
$prod_page = $document->find('.product-one'); | |
if(count($prod_page) > 0){ | |
if($result = parseCategory($document, $pURL, $page['url'], $parent, $result_array)) { | |
$result_array = array_filter_recursive(array_merge_recursive($result_array, $result)); | |
$file_tree = fopen(dirname(__FILE__)."/{$tag}.txt",'w+'); | |
if($file_tree) { | |
fwrite($file_tree, serialize($result_array)); | |
fclose($file_tree); | |
} | |
unset($file_tree, $result); | |
} | |
} | |
if($iter >= 50) break; | |
} | |
} | |
} | |
} | |
//} | |
//print_r(unserialize(file_get_contents(dirname(__FILE__)."/{$tag}.txt"))); | |
//exit; | |
$catalog_file = unserialize(file_get_contents(dirname(__FILE__)."/{$tag}.txt")); | |
$count_items = count($catalog_file); | |
$count_products = 0; | |
foreach($catalog_file as $catalog_file_item){ | |
if(isset($catalog_file_item['products'])){ | |
$count_products = $count_products + count($catalog_file_item['products']); | |
} | |
} | |
$count++; | |
} | |
if(isset($_GET['tree']) && $_GET['tree'] == 1){ | |
echo "<pre>"; | |
echo "построено каталогов: {$count}\n"; | |
echo "элементов в каталоге: {$count_items}\n"; | |
echo "товаров в каталоге: {$count_products}\n"; | |
if($complete){ | |
$URL_GET = str_replace('?tree=1','?off_set=0',$_SERVER['REQUEST_URI']); | |
echo "Ссылка для продолжения обновления каталога: <a id='link' href='$URL_GET'><span id='offset'>{$URL_GET}</span></a>\n"; | |
echo "</pre>"; | |
} | |
else{ | |
$pager = (int)$_GET['page'] + 50; | |
$URL_GET = str_replace('page='.$_GET['page'],'page='.$pager,$_SERVER['REQUEST_URI']); | |
echo "Ссылка для продолжения формирования дерева каталога: <a id='link' href='$URL_GET'><span id='offset'>{$URL_GET}</span></a>\n"; | |
echo "</pre>"; | |
} | |
echo '<script type="text/javascript"> | |
if(document.readyState||document.body.readyState==\'complete\'){ | |
var url = document.getElementById(\'link\').getAttribute(\'href\'); | |
window.location = url; | |
}; | |
</script>'; | |
exit; | |
} | |
//print_r(unserialize(file_get_contents(dirname(__FILE__)."/{$tag}.txt"))); | |
//exit; | |
$catalog = unserialize(file_get_contents(dirname(__FILE__)."/{$tag}.txt")); | |
$info = array('upd' => 0, 'new' => 0, 'error' => 0); | |
$i = $a = 0; | |
if(empty($_GET['off_set'])) { | |
$off_set = 0; | |
} | |
else $off_set = (int)$_GET['off_set']; //определяем смещение парсинга | |
foreach($catalog as $category => $tree){ | |
$data = array(); | |
if(isset($tree['products'])){ | |
foreach($tree['products'] as $article => $product){ | |
$i++; | |
if($i <= $off_set) {continue;} | |
$data = array('parent' => $idParent, 'context_key' => 'web', 'template' => $tplProduct, 'class_key' => 'msProduct', 'published' => 1); | |
$a++; | |
$data['parent_name'] = $category; | |
$data['grand_parent_name'] = ($tree['parent'])?:''; | |
$data['article_donor'] = "{$tag}_".str_replace(array('/','.html'), array('-',''), $article); | |
$data['article'] = "{$tag}_".str_replace(array('/','.html'), array('-',''), end(explode('/', $article))); | |
$data['url_donor'] = $product['url']; | |
$page = get_web_page($data['url_donor']); | |
//$page = get_web_page('http://www.ipaar-poliv.ru/catalog/polivochnoe-oborudovanie-hunter/rotornye-sprinklery/seriya-pgj-srm/sprinkler-pgj-04-hunter/'); | |
$document = phpQuery::newDocument($page['content']); | |
if(!$prices = trim($document->find('.product-price')->attr('data-price'))){ | |
$info['error']++; | |
if ($a == $numOffset) break 2; | |
continue; | |
} | |
$data['price'] = $prices; | |
$data['pagetitle'] = $document->find('h1.content__title')->text(); | |
$data['content'] = trim($document->find('.visible__pc1 .product-parametr')->html()); | |
$data['content'] .= "<div class=\"dop_desc\">" . trim($document->find('.product-one__desc')->html()) . "</div>"; | |
$data['new'] = false; | |
/*if(trim($document->find('.nom_images .label')->text()) == 'NEW'){ | |
$data['new'] = true; | |
}*/ | |
$data['searchable'] = false; | |
if(count($document->find('.product-one__status1')) > 0){ | |
$data['searchable'] = true; | |
} | |
if(count($document->find('.product-one__big img:first')) <= 0){ | |
$data['image'] = ''; | |
} | |
else{ | |
$data['image'] = buildURI($document->find('.product-one__big img:first')->attr('src'), $data['url_donor']); | |
preg_match('/\/(?<image>[a-z0-9\-\_\s]*\.[a-zA-Z]{3,4}$)/', $data['image'], $output_array); | |
$data['image'] = downloadImg($output_array['image'], $data['image']); | |
//unlink($data['image']); | |
} | |
$data['images'] = array(); | |
$images = $document->find('.product-one__big img:not(:first)'); | |
if(count($images) > 0){ | |
foreach($images as $img){ | |
$a_img = pq($img); | |
$item = buildURI($a_img->attr('src'), $data['url_donor']); | |
preg_match('/\/(?<image>[a-z0-9\-\_\s]*\.[a-zA-Z]{3,4}$)/', $item, $output_array); | |
$data['images'][] = downloadImg($output_array['image'], $item); | |
} | |
} | |
/*switch($data['available']){ | |
case 'Уточняйте наличие': | |
case 'Под заказ': | |
$data['searchable'] = false; | |
break; | |
}*/ | |
/* | |
$properties = $document->find('.nom_description .properties-list li'); | |
unset($data['color'], $data['weight'], $data['made_in'], $data['size'], $prop, $item); | |
if(count($properties) > 0){ | |
foreach($properties as $property){ | |
$prop = pq($property); | |
$item = explode(':',trim(preg_replace('/\s+х/', 'х', $prop->text()))); | |
//print_r(trim(preg_replace('/\s+х/', 'х', $prop->text()))); | |
switch(true){ | |
case ((stripos($item[0], 'Страна') !== false)?true:false): | |
$data['made_in'] = mb_convert_case((mb_strtolower($item[1])), MB_CASE_TITLE); | |
break; | |
case ((stripos($item[0], 'Габарит') !== false)?true:false): | |
$data['size'][] = (trim($item[1]) == '1х1х1мм') ? '' : trim($item[1]); | |
break; | |
case ((stripos($item[0], 'Вес') !== false)?true:false): | |
$data['weight'] = trim($item[1]); | |
break; | |
case ((stripos($item[0], 'Размер') !== false)?true:false): | |
$data['size'] = explode(',', str_replace([' ',"\n-",'-',"\n"],['','','',','],trim($item[1])) ); | |
break; | |
case ((stripos($item[0], 'Цвет') !== false)?true:false): | |
$data['color'] = explode(',', str_replace([' ',"\n-",'-',"\n"],['','','',','],trim($item[1])) ); | |
break; | |
case ((stripos($item[0], 'Материал') !== false)?true:false): | |
$data['matherial'] = trim($item[1]); | |
break; | |
case ((stripos($item[0], 'Литраж') !== false)?true:false): | |
$data['displacement'] = trim($item[1]); | |
break; | |
} | |
} | |
} | |
*/ | |
$data['options'] = array('icon-return' => 1, 'matherial' => (isset($data['matherial']))?$data['matherial']:'', 'icon-delivery' => 1, 'displacement' => (isset($data['displacement']))?$data['displacement']:''); | |
//построение дерева категорий до товара и получение ID родителя, в котором далее будем создавать товар | |
if(!$parent = createCategory ($catalog, $data['parent_name'], '', $data['grand_parent_name'])){ | |
//если не вернулся ID категории товара | |
continue; | |
} | |
$data['parent'] = $parent; | |
//print_r($data);exit; | |
//создание или обновление товара | |
$goods = createProduct($data); | |
//плюсуем произведённое действие (товар обновлён, создан или ошибка) | |
switch($goods) { | |
case 'upd': | |
$info['upd']++; | |
break; | |
case 'new': | |
$info['new']++; | |
break; | |
default: | |
$info['error']++; | |
} | |
//прерываем цикл, если достигнут лимит по количеству обрабатываемых товаров за один проход | |
if ($a == $numOffset) break 2; | |
//print_r($data); | |
//print_r($info); | |
//exit; | |
} | |
} | |
} | |
$off_set = $off_set + $numOffset; | |
if (!strpos($_SERVER['REQUEST_URI'],'off_set')) $_SERVER['REQUEST_URI'] = $_SERVER['REQUEST_URI'].'?off_set='.($off_set-$numOffset); | |
$URL_GET = str_replace('off_set='.($off_set-$numOffset), 'off_set='.$off_set, $_SERVER['REQUEST_URI']); | |
if (!XPDO_CLI_MODE) {echo '<pre>';} | |
echo "\nИмпорт прошёл за ".number_format(microtime(true) - $modx->startTime, 7) . " сек.\n"; | |
echo "Итого обработано\n товаров за проход: {$a}\n обновлено из них: {$info['upd']}\n создано новых из них: {$info['new']}\n ошибок, в том числе пропущенных без цен: {$info['error']}\n"; | |
echo "Всего обработано товаров: <a id='link' href='$URL_GET'><span id='offset'>$off_set</span></a>\n"; | |
if ($a == 0) { | |
$autofset = ''; | |
echo "<span style='color:green'>Загрузка завершена</span>"; | |
$paths = array('context_settings/'); | |
$options = array('objects' => null, 'extensions' => array('.php')); | |
$modx->cacheManager->clearCache($paths, $options); | |
} else $autofset = 'window.location = url;'; | |
if (!XPDO_CLI_MODE) { | |
echo '</pre> | |
<script type="text/javascript"> | |
if(document.readyState||document.body.readyState==\'complete\'){ | |
var url = document.getElementById(\'link\').getAttribute(\'href\'); | |
'.$autofset.' | |
}; | |
</script> | |
';} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment