Skip to content

Instantly share code, notes, and snippets.

@dev101
Created August 26, 2016 11:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dev101/60d7e5a716605c07024407c28800370d to your computer and use it in GitHub Desktop.
Save dev101/60d7e5a716605c07024407c28800370d to your computer and use it in GitHub Desktop.
Cian parse
foreach($html->find('tr.offer_container') as $tr) {
$el = $tr->find('td.objects_item_info_col_2', 0);
$rooms = is_object($el) ? intval($el->plaintext) :'';
$station = [];
$el = $tr->find('div.objects_item_metro', 0);
if ($el) {
preg_match('@м\. ([^\s]+)\s+(\d+)\s+мин\.\s+(.+?)\s*$@', $el->plaintext, $station);
} else {
$station = false;
}
$addrBlocks = [];
foreach ($tr->find('div.objects_item_addr') as $addrBlock) {
$addrBlocks[]= trim($addrBlock->plaintext);
}
$bldInfo = [];
preg_match('@^\s*(\d+)/(\d+)\s+([^\s].+)?@', $tr->find('td.objects_item_info_col_5', 0)->plaintext, $bldInfo);
$priceInfo = [];
preg_match('@((\s\d+)+)@', $tr->find('div.objects_item_price', 0)->plaintext, $priceInfo);
$totalArea = false; $livingArea = false; $kitchenArea = false; $roomsArea = false;
foreach ($tr->find('table.objects_item_props > td') as $prop) {
if (false !== mb_strpos($prop->plaintext, 'Общая') && preg_match('@(\d+,?\d?)@', $prop->plaintext, $areaInfo)) {
$totalArea = floatval(str_replace(',', '.', $areaInfo[1]));
} else
if (false !== mb_strpos($prop->plaintext, 'Жилая') && preg_match('@(\d+,?\d?)@', $prop->plaintext, $areaInfo)) {
$livingArea = floatval(str_replace(',', '.', $areaInfo[1]));
} else
if (false !== mb_strpos($prop->plaintext, 'Кухня') && preg_match('@(\d+,?\d?)@', $prop->plaintext, $areaInfo)) {
$kitchenArea = floatval(str_replace(',', '.', $areaInfo[1]));
} else {
$roomsArea = str_replace(' ', '', str_replace(['/','-'], '+', trim($prop->plaintext)));
}
}
$objProps = [];
foreach ($tr->find('table.objects_item_details > td') as $prop) {
if ((mb_strpos($prop->plaintext, 'Альтернатива')!==FALSE) || (mb_strpos($prop->plaintext, 'Свободная')!==FALSE)) {
$objProps[] = ['label' => 'Тип продажи', 'value' => trim($prop->plaintext)];
} else if (mb_strpos($prop->plaintext, 'Новостройка')!==FALSE || mb_strpos($prop->plaintext, 'Вторичка')!==FALSE) {
$objProps[] = ['label' => 'Тип дома', 'value' => trim($prop->plaintext)];
} else if (mb_strpos($prop->plaintext, 'Сдача ГК:')!==FALSE) {
$objProp = explode(':', trim($prop->plaintext));
$objProps[] = ['label' => 'Сдача ГК', 'value' => trim($objProp[1])];
} else if (mb_strpos($prop->plaintext, 'Дом сдан')!==FALSE) {
$objProps[] = ['label' => 'Сдача ГК', 'value' => trim($prop->plaintext)];
} else {
$objProp = explode(': ', trim($prop->plaintext));
if (2 === count($objProp)) {
$objProps[] = ['label' => trim($objProp[0]), 'value' => str_replace(' ', '', trim($objProp[1]))];
}
}
}
$posInfo = false; $el = $tr->find('input[name=offer_coords]', 0);
if (is_object($el)) {
$posInfo = explode(',', $el->value);
}
$el = $tr->find('div.object_actions > a', 0); $url = false; $photo = false;
if (is_object($el)) {
$url = $el->href;
if (mb_strpos($el->plaintext, 'Фото') !== false) {
$photo = true;
}
}
$el = $tr->find('div.objects_item_info_col_comment_text', 0);
$comment = is_object($el) ? trim(str_replace('Перейти к странице объявления', '', $el->plaintext)) : '';
$el = $tr->find('td.objects_item_info_col_7', 0);
$contacts = is_object($el) ? trim($el->plaintext) : '';
$el = $tr->find('span.objects_item_dt_added', 0);
$dt = is_object($el) ? gmdate ('Y-m-d\TH:i:s\Z', strtotime(str_replace(['вчера','сегодня','Янв','Фев','Мар','Апр','Мая','Июн','Июл','Авг','Сен','Окт','Ноя','Дек'], ['yesterday','today','January','February','March','April','May','June','July','August','September','October','November','December'], trim($el->plaintext)))) : '';
$ad = [];
$ad['id'] = $tr->oid;
$ad['rooms'] = $rooms;
$ad['station'] = isset($station[1]) ? $station[1] : '';
$ad['stationDist'] = isset($station[2]) ? $station[2].($station[3] === 'пешком' ? 'п':'т') : '';
//$ad['region'] = isset($addrBlocks[0]) ? (isset(isset($addrBlocks[1])) ? $addrBlocks[0]) : '';
$ad['address'] = isset($addrBlocks[1]) && isset($addrBlocks[2]) ? (FALSE !== mb_strpos($addrBlocks[1], 'район') ? implode(', ', array_slice($addrBlocks, 2)) : implode(', ', array_slice($addrBlocks, 1))) : '';
$ad['floor'] = isset($bldInfo[1]) ? intval($bldInfo[1]) : '';
$ad['maxFloor'] = isset($bldInfo[2]) ? intval($bldInfo[2]) : '';
$ad['bldType'] = isset($bldInfo[3]) ? str_replace(' дом', '', trim($bldInfo[3])) : '';
$ad['area'] = str_replace('.', ',', ($totalArea ?: '?').'/'.($livingArea ?: '?').'/'.($kitchenArea ?: '?').($roomsArea ? '/'.$roomsArea : ''));
$ad['totalArea'] = $totalArea ?: '';
$ad['priceRUR'] = isset($priceInfo[1]) ? intval(str_replace(' ', '', $priceInfo[1])) : '';
$ad['photo'] = $photo;
$ad['pos'] = is_array($posInfo) && count($posInfo) === 2 ? [floatval($posInfo[0]), floatval($posInfo[1])] : '';
$ad['comment'] = mb_substr($comment, 0, 300);
//$ad['contacts'] = $contacts;
//$ad['src'] = $url;
$ad['dt'] = $dt;
//$ad['prop'] = $objProps;
$result[] = $ad;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment