Skip to content

Instantly share code, notes, and snippets.

@muratpurc
Created September 22, 2011 10:16
Show Gist options
  • Save muratpurc/1234477 to your computer and use it in GitHub Desktop.
Save muratpurc/1234477 to your computer and use it in GitHub Desktop.
PHP: Extended ressource/file/url link check with redirect handling (Linkchecker)
/**
* Checks HTTP Links.
* Based on function phpLinkCheck from Johannes Froemter <j-f@gmx.net>, 2001-04-14.
*
* Usage:
* <code>
* $url = 'http://www.google.de/?q=foobar';
* $res = mp_linkCheck($url);
*
* // key 'Status-Code' will contain the HTTP status code (e.g. 200 or 404).
* echo $res['Status-Code'] . "\n";
*
* // key 'Location-Status-Code' will contain the status code of the new loaction, in case of a 3xx code (redirection).
* echo $res['Location-Status-Code'] . "\n";
*
* // dump variable to see the complete result
* print_r($res);
* </code>
*
* @param string $url
* @param array $options Options array with following feasible values:
* - $options['max_redirects'] (int) Number of maximal redirects
* - $options['port'] (int) The port for the connection, default value is 80.
* Url containing a port definition overwrites the port in options!
* - $options['socket_timeout'] (int) Socket timeout in seconds, default value is 30.
* - $options['user_agent'] (string) User agent string.
*/
function mp_linkCheck($url, array $options = array()) {
$url = trim($url);
if (strpos($url, '://') === false) {
$url = "http://$url";
}
// parse url
$comp = parse_url($url);
if (strtolower($comp['scheme']) != 'http') {
return false;
}
// options check
if (!isset($options['max_redirects']) || (int) $options['max_redirects'] < 0) {
$options['max_redirects'] = 0;
}
if (!isset($options['port']) || (int) $options['port'] < 0) {
$options['port'] = 80;
}
$options['this.counter'] = (isset($options['this.counter'])) ? ++$$options['this.counter'] : 1;
if (!isset($options['socket_timeout']) || (int) $options['socket_timeout'] < 1) {
$options['socket_timeout'] = 30;
}
if (!isset($options['user_agent'])) {
$options['user_agent'] = null;
} else {
$options['user_agent'] = trim($options['user_agent']);
}
$host = $comp['host'];
$port = (isset($comp['port'])) ? $comp['port'] : $options['port'];
$path = $comp['path'];
if (isset($comp['query'])) {
$path .= '?' . $comp['query'];
}
if (isset($comp['fragment'])) {
$path .= '#' . $comp['fragment'];
}
// open connection
if (!$fp = fsockopen($host, $port, $errno, $errstr, $options['socket_timeout'])) {
return false;
}
// compose request
$httpRequest = "HEAD ". $path ." HTTP/1.1\r\n"
. "Host: ". $host ."\r\n";
if ($options['user_agent']) {
$httpRequest .= "User-Agent: " . $options['user_agent'] . "\r\n";
}
$httpRequest .= "Connection: close\r\n\r\n";
// send request and read response
$httpResponse = '';
fputs($fp, $httpRequest);
while (!feof($fp)) {
$httpResponse .= fgets($fp, 1024);
}
fclose($fp);
// extract
if (!preg_match('=^(HTTP/\d+\.\d+) (\d{3}) ([^\r\n]*)=', $httpResponse, $matches)) {
return false;
}
$http['Status-Line'] = $matches[0];
$http['HTTP-Version'] = $matches[1];
$http['Status-Code'] = $matches[2];
$http['Reason-Phrase'] = $matches[3];
if ($options['this.counter'] >= $options['max_redirects']) {
return $http;
}
$rclasses = array('Informational', 'Success', 'Redirection', 'Client Error', 'Server Error');
$http['Response-Class'] = $rclasses[$http['Status-Code'][0] - 1];
if (preg_match_all('=^(.+): ([^\r\n]*)=m', $head, $matches, PREG_SET_ORDER)) {
foreach ($matches as $line) {
$http[$line[1]] = $line[2];
}
if ($http['Status-Code'][0] == 3) {
$http['Location-Status-Code'] = mp_linkCheck($http['Location'], $options);
}
}
return $http;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment