Check a list of URLs for content using PHP & CURL
<?php | |
// usage: cat urls.txt | php checklinks.php --use-curl | |
// initialize results arrays | |
$valid_links = array(); | |
$invalid_links = array(); | |
// check command line option whether to use curl (prefered method) | |
$use_curl = false; | |
if (isset($argv[1])) | |
{ | |
if ($argv[1] == "--use-curl") | |
{ | |
$use_curl = true; | |
} | |
} | |
// read a list of files from stdin | |
$stdin = fopen("php://stdin", "r"); | |
// check each one | |
while ($url = @fgets($stdin)) | |
{ | |
if ($use_curl) | |
{ | |
$is_valid_url = check_link_curl($url); | |
} | |
else | |
{ | |
$is_valid_url = check_link_fopen($url); | |
} | |
if ($is_valid_url) | |
{ | |
print " valid $url"; | |
$valid_links[] = $url; | |
} | |
else | |
{ | |
print " invalid $url"; | |
$invalid_links[] = $url; | |
} | |
} | |
// report totals | |
print "valid links: " . count($valid_links) . "\n"; | |
print "invalid links: " . count($invalid_links) . "\n"; | |
//NOTE: this will not work unless php.ini has allow_url_fopen enabled | |
//You may also have trouble if you are behind a proxy | |
function check_link_fopen($url) | |
{ | |
$file_handle = fopen($url, 'r'); | |
if ($file_handle) | |
{ | |
fclose($f); | |
return true; | |
} | |
return false; | |
} | |
// NOTE: this requires curllib to be compiled into PHP | |
function check_link_curl($url) | |
{ | |
$curl = curl_init(); | |
$curl_options = array(); | |
$curl_options[CURLOPT_RETURNTRANSFER] = true; // do not output to browser | |
$curl_options[CURLOPT_URL] = "$url"; // set URL | |
$curl_options[CURLOPT_NOBODY] = true; // do a HEAD request only | |
$curl_options[CURLOPT_TIMEOUT] = 60; // 1 minute | |
curl_setopt_array($curl, $curl_options); | |
curl_exec($curl); | |
$status = curl_getinfo($curl, CURLINFO_HTTP_CODE); | |
curl_close($curl); | |
if ($status == 200) // success | |
{ | |
return true; | |
} | |
return false; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment