Skip to content

Instantly share code, notes, and snippets.

@SquidDev
Created August 26, 2014 12:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SquidDev/4781c30d28304837900e to your computer and use it in GitHub Desktop.
Save SquidDev/4781c30d28304837900e to your computer and use it in GitHub Desktop.
Sub-domain parsing

Sub-domain parsing

Sometimes you have domains like a.really.long.url.thing.com and you need to get the base domain name. Yes, you could run:

$domains = explode('.', $host_name);
$target = array_slice($domains, -2, 1);
$root = $target[0];

But what if it is .co.uk? That is when you need this slightly badly written script.

It is worth noting that fr.co.uk is a valid URL despite fr, co and uk all being TLDs. So therefore, a.thing.fr.co.uk would be converted to thing.fr.co.uk instead of fr.co.uk. Obviously there are bugs.

<?php
// Fetch domains - might want to cache it
SubDomains::initTLds();
foreach(SubDomains::getSubDomains('some.thing.which.is.a.long.subdomain.bbc.co.uk') as $domain){
echo sprintf('<a href="http://%s">%s</a>', $domain, $domain) . "\n";
$contents = @file_get_contents('http://'.$domain);
if($contents !== false){
echo htmlspecialchars(substr($contents, 0, 255));
break;
}
echo '<hr />';
}
<?php
class SubDomains {
protected static $tlds = array();
const TLD_URL = 'http://data.iana.org/TLD/tlds-alpha-by-domain.txt';
const COMMENT_CHARACTER = '#';
/**
* Fetches domains from data.iana site
*/
public static function initTLds() {
$file = fopen(static::TLD_URL, 'r');
if(!$file) throw new Exception('Unable to open TLD source');
while(!feof($file)){
$line = trim(fgets($file));
if(substr($line, 0, 1) == static::COMMENT_CHARACTER) continue;
$Tlds[] = strtolower($line);
}
}
/**
* @param string $domain Domain of the URL
* @return array Each subdomain item in turn
*/
public static function getSubDomains($domain){
$parts = explode('.', $domain);
$current_domain = static::createDomain($parts);
$domains = array($current_domain);
for($i = count($parts); $i > 0; $i--){
$part = array_pop($parts);
$current_domain = $part . '.' . $current_domain;
$domains[] = $current_domain;
}
return array_reverse($domains);
}
/**
* @param string $domain Domain of the URL
* @return string The root domain (www.bbc.co.uk maps to bbc.co.uk)
*/
public static function getRootDomain($domain){
$parts = explode('.', $domain);
return static::createDomain($domain);
}
/**
* @param string $subdomain The string to check
* @return string If the string is a TLD
*/
public static function isTld($subdomain){
return in_array($subdomain, static::$tlds);
}
/**
* Finds the first instance of a non-tld url
* @param array $parts Parts of the domain
* @return string All elements of the domain upuntil that point
*/
protected static function createDomain(&$parts){
$current_domain = array_pop($parts);
$length = count($parts);
for($i = 0; $i < $length; $i++){
$subdomain = array_pop($parts);
$current_domain = $subdomain . '.' . $current_domain;
if(!static::isTld($subdomain)) break;
}
return $current_domain;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment