Skip to content

Instantly share code, notes, and snippets.

@JaviPedrera
Created November 16, 2015 23:05
Show Gist options
  • Save JaviPedrera/ebc29de9947c10ba37c1 to your computer and use it in GitHub Desktop.
Save JaviPedrera/ebc29de9947c10ba37c1 to your computer and use it in GitHub Desktop.
<?php
class yahooScraper {
/**
* Properties
*/
private $html;
private $resultsContainer;
private $resultsCount;
private $titles;
private $links;
private $cache;
private $bodies;
/**
* [scrapUrl]
* This function will scrap an URL by saving the desired tags into properties
*/
public function scrapUrl($url)
{
$this->html = file_get_contents($url);
// Extract the results ordered list
preg_match('/\<ol class\=\" reg searchCenterMiddle\">(.*?)\<\/ol\>/i', $this->html, $result);
$this->resultsContainer = $result[0];
// Extract one of each elements to display
preg_match_all('/\<h3(.*?)\<\/h3\>/i', $this->resultsContainer, $this->titles);
preg_match_all('/\<p(.*?)\<\/p\>/i', $this->resultsContainer, $this->bodies);
preg_match_all('/\<span(.*?)\<\/span\>/i', $this->resultsContainer, $this->links);
preg_match_all('/\<a(.*?)\<\/a\>/i', $this->resultsContainer, $this->cache);
// Get the total results number for the subsequent iteration
$this->resultsCount = count($this->titles[0]);
}
/**
* [printScrap]
* This function will print the scraped content automatically
*/
public function printScrap()
{
for ($i=0; $i < $this->resultsCount; $i++) {
echo strip_tags($this->titles[0][$i]);
echo "<br/>";
echo strip_tags($this->links[0][$i]) . " - " . strip_tags($this->cache[0][$i]);
echo "<br/>";
echo strip_tags($this->bodies[0][$i]);
echo "<br/>";
echo "<br/>";
}
}
}
$scraper = new yahooScraper();
$scraper->scrapUrl('https://es.search.yahoo.com/search?p=madrid&fr=yfp-t-777');
$scraper->printScrap();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment