How to scrap a hotel website related to own website. It is a example code.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Mexlike Documentation (Spanish): https://mexlike.io/como-hacer-web-scraping-con-php-simple-html-dom | |
// Call dependency. Download page: https://sourceforge.net/projects/simplehtmldom/files/ | |
require 'simple_html_dom.php'; | |
// Create DOM from URL or file | |
$html = file_get_html('https://www.tripadvisor.com/Hotels-g150810-Isla_Mujeres_Yucatan_Peninsula-Hotels.html'); | |
// List of hotels | |
$wrap_hotels = $html->find('div.prw_meta_hsx_responsive_listing'); | |
/** | |
* @param Array $attr_image Attributes in array | |
* | |
* @return Image URL | |
*/ | |
function get_standar_image_url($attr_image) { | |
// Return default | |
$url_image = 'nothing'; | |
// Only for style attibutes | |
if (isset($attr_image['style'])) { | |
// Remove background-image:url( & ) from background-image:url(URL_DE_IMAGEN) | |
// You can also use regex | |
$url_image = substr($attr_image['style'], 21, -2); | |
} else { | |
// Nothing to do | |
$url_image = $attr_image['data-lazyurl']; | |
} | |
// Return result | |
return $url_image; | |
} | |
// List for Hotels | |
$list_hotels = array(); | |
// Find all images | |
foreach($wrap_hotels as $element) { | |
$hotel = new stdClass(); | |
$hotel->name = $element->find('.property_title', 0)->plaintext; | |
$hotel->price = $element->find('.price', 0)->plaintext; | |
$hotel->image_url = get_standar_image_url($element->find('.inner', 0)->attr); | |
$hotel->href = $element->find('.photo-wrapper a', 0)->href; | |
array_push($list_hotels, $hotel); | |
} | |
?> | |
<!DOCTYPE html> | |
<html lang="en" dir="ltr"> | |
<head> | |
<meta charset="utf-8"> | |
<title>Registro de Hoteles de Isla Mujeres</title> | |
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css"> | |
<style media="screen"> | |
.container { | |
min-height: 15rem; | |
position: relative; | |
} | |
.img-fluid { | |
width: 100%; | |
} | |
.thumbnail { | |
position: relative; | |
width: 100%; | |
height: 100%; | |
overflow: hidden; | |
} | |
.thumbnail img { | |
position: absolute; | |
left: 50%; | |
top: 50%; | |
height: 100%; | |
width: auto; | |
-webkit-transform: translate(-50%,-50%); | |
-ms-transform: translate(-50%,-50%); | |
transform: translate(-50%,-50%); | |
} | |
.thumbnail img.portrait { | |
width: 100%; | |
height: auto; | |
} | |
</style> | |
</head> | |
<body> | |
<nav class="navbar navbar-expand-md navbar-dark fixed-top bg-dark"> | |
<a class="navbar-brand" href="#">Logo</a> | |
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarsExampleDefault" aria-controls="navbarsExampleDefault" aria-expanded="false" aria-label="Toggle navigation"> | |
<span class="navbar-toggler-icon"></span> | |
</button> | |
<div class="collapse navbar-collapse" id="navbarsExampleDefault"> | |
<ul class="navbar-nav mr-auto"> | |
<li class="nav-item active"> | |
<a class="nav-link" href="#">Home <span class="sr-only">(current)</span></a> | |
</li> | |
<li class="nav-item"> | |
<a class="nav-link" href="#">Hoteles</a> | |
</li> | |
</ul> | |
<form class="form-inline my-2 my-lg-0"> | |
<input class="form-control mr-sm-2" type="text" placeholder="Search" aria-label="Search"> | |
<button class="btn btn-outline-success my-2 my-sm-0" type="submit">Search</button> | |
</form> | |
</div> | |
</nav> | |
<main> | |
<div class="jumbotron thumbnail"> | |
<img class="portrait" src="https://mexlike.io/wp-content/uploads/2018/06/Playa-norte-de-Isla-Mujeres.jpg" alt=""> | |
<div class="container"> | |
<h1 class="display-3 text-light">Hoteles de Isla Mujeres!</h1> | |
<h3 class="bg-dark text-light">Te presentamos los hoteles más populares de Isla Mujeres. Encontrarás las mejores recomendaciones de nuestra comunidad.</h3> | |
</div> | |
</div> | |
<div class="container-fluid"> | |
<div class="d-flex flex-wrap"> | |
<?php foreach ($list_hotels as $hotel): ?> | |
<div class="col-lg-4 col-md-4 col-sm-6 col-xs-6 "> | |
<div class="wrap-box"> | |
<div class="box-img"> | |
<a href="<?php echo 'https://www.tripadvisor.com/' . $hotel->href ?>"> | |
<img src="<?php echo $hotel->image_url ?>" class="img-fluid" alt="<?php echo $hotel->name ?>"> | |
</a> | |
</div> | |
<div class="rooms-content"> | |
<h4><a href="<?php echo 'https://www.tripadvisor.com/' . $hotel->href ?>"><?php echo $hotel->name ?></a></h4> | |
<p class="price"><?php echo $hotel->price ?> / Por Noche</p> | |
</div> | |
</div> | |
</div> | |
<?php endforeach; ?> | |
</div> | |
</div> | |
</main> | |
<script src="https://code.jquery.com/jquery-3.2.1.slim.min.js"></script> | |
<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.12.9/umd/popper.min.js"></script> | |
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/js/bootstrap.min.js"></script> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment