Skip to content

Instantly share code, notes, and snippets.

@mrclay
Created February 22, 2013 21:49
Show Gist options
  • Save mrclay/5016806 to your computer and use it in GitHub Desktop.
Save mrclay/5016806 to your computer and use it in GitHub Desktop.
Crudely sniff the title of a web page reading as few bytes as possible
<?php
namespace UFCOE;
/**
* Sniff the title of a web page by reading as few bytes as possible.
*
* Warning, this is blissfully unaware of character encodings so that it can be fast. It only returns a string
* if the markup appears ASCII compatible, but you could get back UTF-8, Windows-125*, ISO-8859-*, etc.
*/
class HttpTitleSniffer {
public $httpContextOptions = array(
'max_redirects' => 1,
'timeout' => 5,
);
public $bytesPerRead = 1024;
public function sniffTitle($url, $stream = null) {
if (!$stream) {
$stream = $this->getStream($url);
if (!$stream) {
return false;
}
}
// read until it looks like we have the closing title tag
$content = '';
$hasTitle = false;
while (!feof($stream) || !$hasTitle) {
$content .= fread($stream, $this->bytesPerRead);
$hasTitle = (false !== strpos($content, '</title>'));
}
fclose($stream);
if (preg_match('~<title\b[^>]*>([^<]+)<~', $content, $m)) {
return html_entity_decode($m[1], ENT_HTML401, 'UTF-8');
}
return false;
}
protected function getStream($url) {
// see https://bugs.php.net/bug.php?id=51192
$php_5_2_13_and_below = version_compare(PHP_VERSION, '5.2.14', '<');
$php_5_3_0_to_5_3_2 = version_compare(PHP_VERSION, '5.3.0', '>=') &&
version_compare(PHP_VERSION, '5.3.3', '<');
if ($php_5_2_13_and_below || $php_5_3_0_to_5_3_2) {
$tmpUrl = str_replace("-", "", $url);
$validated = filter_var($tmpUrl, FILTER_VALIDATE_URL);
} else {
$validated = filter_var($url, FILTER_VALIDATE_URL);
}
if (!$validated) {
return false;
}
// validator accepts all kinds of protocols
if (!preg_match('~^https?\://~', $url)) {
return false;
}
$ctx = stream_context_create(array('http' => $this->httpContextOptions));
return fopen($url, 'r', false, $ctx);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment