Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Given an input URL, find the canonical URL after following redirects and looking at rel=canonical
if(!isset($_GET['url'])) {
<form action="" method="get">
<input type="url" name="url">
<input type="submit" value="Go">
header('Content-type: text/plain');
// Given an input URL, find the canonical URL, after following redirects and looking for rel=canonical in the source HTML
$url = $_GET['url'];
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, 10);
// Some sites don't like crawlers, so pretend to be a browser
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36'
$body = curl_exec($ch);
$final_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
$url = $final_url;
// Check for rel=canonical
if($body) {
$dom = load_html($body);
if($dom) {
$links = $dom->getElementsByTagName('link');
foreach($links as $link) {
$rels = [];
if($link->hasAttribute('rel') && ($relAtt = $link->getAttribute('rel')) !== '') {
$rels = preg_split('/\s+/', trim($relAtt));
if(in_array('canonical', $rels)) {
$url = $link->getAttribute('href');
echo $url."\n";
function load_html($html) {
$dom = new DOMDocument;
libxml_use_internal_errors(true); // suppress parse errors and warnings
// Force interpreting this as UTF-8
@$dom->loadHTML('<?xml encoding="UTF-8">' . $html, LIBXML_NOWARNING|LIBXML_NOERROR);
return $dom;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment