Created
April 11, 2013 20:27
-
-
Save pocesar/5366899 to your computer and use it in GitHub Desktop.
PHP code to get the domain name without subdomains (includes the tld, and the special types from IANA). Don't have support for unicode domain names.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* @param string $domain Pass $_SERVER['SERVER_NAME'] here | |
* @param bool $debug | |
* | |
* @debug bool $debug | |
* @return string | |
*/ | |
function get_domain($domain, $debug = false) | |
{ | |
$original = $domain = strtolower($domain); | |
if (filter_var($domain, FILTER_VALIDATE_IP)) { return $domain; } | |
$debug ? print('<strong style="color:green">»</strong> Parsing: '.$original) : false; | |
$arr = array_slice(array_filter(explode('.', $domain, 4), function($value){ | |
return $value !== 'www'; | |
}), 0); //rebuild array indexes | |
if (count($arr) > 2) | |
{ | |
$count = count($arr); | |
$_sub = explode('.', $count === 4 ? $arr[3] : $arr[2]); | |
$debug ? print(" (parts count: {$count})") : false; | |
if (count($_sub) === 2) // two level TLD | |
{ | |
$removed = array_shift($arr); | |
if ($count === 4) // got a subdomain acting as a domain | |
{ | |
$removed = array_shift($arr); | |
} | |
$debug ? print("<br>\n" . '[*] Two level TLD: <strong>' . join('.', $_sub) . '</strong> ') : false; | |
} | |
elseif (count($_sub) === 1) // one level TLD | |
{ | |
$removed = array_shift($arr); //remove the subdomain | |
if (strlen($_sub[0]) === 2 && $count === 3) // TLD domain must be 2 letters | |
{ | |
array_unshift($arr, $removed); | |
} | |
else | |
{ | |
// non country TLD according to IANA | |
$tlds = array( | |
'aero', | |
'arpa', | |
'asia', | |
'biz', | |
'cat', | |
'com', | |
'coop', | |
'edu', | |
'gov', | |
'info', | |
'jobs', | |
'mil', | |
'mobi', | |
'museum', | |
'name', | |
'net', | |
'org', | |
'post', | |
'pro', | |
'tel', | |
'travel', | |
'xxx', | |
); | |
if (count($arr) > 2 && in_array($_sub[0], $tlds) !== false) //special TLD don't have a country | |
{ | |
array_shift($arr); | |
} | |
} | |
$debug ? print("<br>\n" .'[*] One level TLD: <strong>'.join('.', $_sub).'</strong> ') : false; | |
} | |
else // more than 3 levels, something is wrong | |
{ | |
for ($i = count($_sub); $i > 1; $i--) | |
{ | |
$removed = array_shift($arr); | |
} | |
$debug ? print("<br>\n" . '[*] Three level TLD: <strong>' . join('.', $_sub) . '</strong> ') : false; | |
} | |
} | |
elseif (count($arr) === 2) | |
{ | |
$arr0 = array_shift($arr); | |
if (strpos(join('.', $arr), '.') === false | |
&& in_array($arr[0], array('localhost','test','invalid')) === false) // not a reserved domain | |
{ | |
$debug ? print("<br>\n" .'Seems invalid domain: <strong>'.join('.', $arr).'</strong> re-adding: <strong>'.$arr0.'</strong> ') : false; | |
// seems invalid domain, restore it | |
array_unshift($arr, $arr0); | |
} | |
} | |
$debug ? print("<br>\n".'<strong style="color:gray">«</strong> Done parsing: <span style="color:red">' . $original . '</span> as <span style="color:blue">'. join('.', $arr) ."</span><br>\n") : false; | |
return join('.', $arr); | |
} | |
$urls = array( | |
'www.example.com' => 'example.com', | |
'example.com' => 'example.com', | |
'example.com.br' => 'example.com.br', | |
'www.example.com.br' => 'example.com.br', | |
'www.example.gov.br' => 'example.gov.br', | |
'localhost' => 'localhost', | |
'www.localhost' => 'localhost', | |
'subdomain.localhost' => 'localhost', | |
'www.subdomain.example.com' => 'example.com', | |
'subdomain.example.com' => 'example.com', | |
'subdomain.example.com.br' => 'example.com.br', | |
'www.subdomain.example.com.br' => 'example.com.br', | |
'www.subdomain.example.biz.br' => 'example.biz.br', | |
'subdomain.example.biz.br' => 'example.biz.br', | |
'subdomain.example.net' => 'example.net', | |
'www.subdomain.example.net' => 'example.net', | |
'www.subdomain.example.co.kr' => 'example.co.kr', | |
'subdomain.example.co.kr' => 'example.co.kr', | |
'example.co.kr' => 'example.co.kr', | |
'example.jobs' => 'example.jobs', | |
'www.example.jobs' => 'example.jobs', | |
'subdomain.example.jobs' => 'example.jobs', | |
'insane.subdomain.example.jobs' => 'example.jobs', | |
'insane.subdomain.example.com.br' => 'example.com.br', | |
'www.doubleinsane.subdomain.example.com.br' => 'example.com.br', | |
'www.subdomain.example.jobs' => 'example.jobs', | |
'test' => 'test', | |
'www.test' => 'test', | |
'subdomain.test' => 'test', | |
'www.detran.sp.gov.br' => 'sp.gov.br', | |
'www.mp.sp.gov.br' => 'sp.gov.br', | |
'ny.library.museum' => 'library.museum', | |
'www.ny.library.museum' => 'library.museum', | |
'ny.ny.library.museum' => 'library.museum', | |
'www.library.museum' => 'library.museum', | |
'info.abril.com.br' => 'abril.com.br', | |
'127.0.0.1' => '127.0.0.1', | |
'::1' => '::1', | |
); | |
$failed = 0; | |
$total = count($urls); | |
foreach ($urls as $from => $expected) | |
{ | |
$from = get_domain($from, true); | |
if ($from !== $expected) | |
{ | |
$failed++; | |
print("<div style='color:fuchsia;'>expected {$from} to be {$expected}</div>"); | |
} | |
} | |
if ($failed) | |
{ | |
print("{$failed} tests failed out of {$total}"); | |
} | |
else | |
{ | |
print("Success"); | |
} |
the best thing is to create a lookup array with all available TLDs from IANA, a lot have been added since 2013 including having support for punycode https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains a bit of manual setup at first, but performance, and strictly compliant to the list
very cool! Saves some custom code writting! Thanks!
Well written code for finding main domain, thanks...
If you want all TLD list, you can find it updated in https://data.iana.org/TLD/tlds-alpha-by-domain.txt
But if you want the list for PHP language you can copy here:
$tlds = array(
# Version 2022060900, Last Updated Thu Jun 9 07:07:02 2022 UTC
'aaa',
'aarp',
'abarth',
'abb',
'abbott',
'abbvie',
'abc',
'able',
'abogado',
'abudhabi',
'ac',
'academy',
'accenture',
'accountant',
'accountants',
'aco',
'actor',
'ad',
'adac',
'ads',
'adult',
'ae',
'aeg',
'aero',
'aetna',
'af',
'afl',
'africa',
'ag',
'agakhan',
'agency',
'ai',
'aig',
'airbus',
'airforce',
'airtel',
'akdn',
'al',
'alfaromeo',
'alibaba',
'alipay',
'allfinanz',
'allstate',
'ally',
'alsace',
'alstom',
'am',
'amazon',
'americanexpress',
'americanfamily',
'amex',
'amfam',
'amica',
'amsterdam',
'analytics',
'android',
'anquan',
'anz',
'ao',
'aol',
'apartments',
'app',
'apple',
'aq',
'aquarelle',
'ar',
'arab',
'aramco',
'archi',
'army',
'arpa',
'art',
'arte',
'as',
'asda',
'asia',
'associates',
'at',
'athleta',
'attorney',
'au',
'auction',
'audi',
'audible',
'audio',
'auspost',
'author',
'auto',
'autos',
'avianca',
'aw',
'aws',
'ax',
'axa',
'az',
'azure',
'ba',
'baby',
'baidu',
'banamex',
'bananarepublic',
'band',
'bank',
'bar',
'barcelona',
'barclaycard',
'barclays',
'barefoot',
'bargains',
'baseball',
'basketball',
'bauhaus',
'bayern',
'bb',
'bbc',
'bbt',
'bbva',
'bcg',
'bcn',
'bd',
'be',
'beats',
'beauty',
'beer',
'bentley',
'berlin',
'best',
'bestbuy',
'bet',
'bf',
'bg',
'bh',
'bharti',
'bi',
'bible',
'bid',
'bike',
'bing',
'bingo',
'bio',
'biz',
'bj',
'black',
'blackfriday',
'blockbuster',
'blog',
'bloomberg',
'blue',
'bm',
'bms',
'bmw',
'bn',
'bnpparibas',
'bo',
'boats',
'boehringer',
'bofa',
'bom',
'bond',
'boo',
'book',
'booking',
'bosch',
'bostik',
'boston',
'bot',
'boutique',
'box',
'br',
'bradesco',
'bridgestone',
'broadway',
'broker',
'brother',
'brussels',
'bs',
'bt',
'bugatti',
'build',
'builders',
'business',
'buy',
'buzz',
'bv',
'bw',
'by',
'bz',
'bzh',
'ca',
'cab',
'cafe',
'cal',
'call',
'calvinklein',
'cam',
'camera',
'camp',
'cancerresearch',
'canon',
'capetown',
'capital',
'capitalone',
'car',
'caravan',
'cards',
'care',
'career',
'careers',
'cars',
'casa',
'case',
'cash',
'casino',
'cat',
'catering',
'catholic',
'cba',
'cbn',
'cbre',
'cbs',
'cc',
'cd',
'center',
'ceo',
'cern',
'cf',
'cfa',
'cfd',
'cg',
'ch',
'chanel',
'channel',
'charity',
'chase',
'chat',
'cheap',
'chintai',
'christmas',
'chrome',
'church',
'ci',
'cipriani',
'circle',
'cisco',
'citadel',
'citi',
'citic',
'city',
'cityeats',
'ck',
'cl',
'claims',
'cleaning',
'click',
'clinic',
'clinique',
'clothing',
'cloud',
'club',
'clubmed',
'cm',
'cn',
'co',
'coach',
'codes',
'coffee',
'college',
'cologne',
'com',
'comcast',
'commbank',
'community',
'company',
'compare',
'computer',
'comsec',
'condos',
'construction',
'consulting',
'contact',
'contractors',
'cooking',
'cookingchannel',
'cool',
'coop',
'corsica',
'country',
'coupon',
'coupons',
'courses',
'cpa',
'cr',
'credit',
'creditcard',
'creditunion',
'cricket',
'crown',
'crs',
'cruise',
'cruises',
'cu',
'cuisinella',
'cv',
'cw',
'cx',
'cy',
'cymru',
'cyou',
'cz',
'dabur',
'dad',
'dance',
'data',
'date',
'dating',
'datsun',
'day',
'dclk',
'dds',
'de',
'deal',
'dealer',
'deals',
'degree',
'delivery',
'dell',
'deloitte',
'delta',
'democrat',
'dental',
'dentist',
'desi',
'design',
'dev',
'dhl',
'diamonds',
'diet',
'digital',
'direct',
'directory',
'discount',
'discover',
'dish',
'diy',
'dj',
'dk',
'dm',
'dnp',
'do',
'docs',
'doctor',
'dog',
'domains',
'dot',
'download',
'drive',
'dtv',
'dubai',
'dunlop',
'dupont',
'durban',
'dvag',
'dvr',
'dz',
'earth',
'eat',
'ec',
'eco',
'edeka',
'edu',
'education',
'ee',
'eg',
'email',
'emerck',
'energy',
'engineer',
'engineering',
'enterprises',
'epson',
'equipment',
'er',
'ericsson',
'erni',
'es',
'esq',
'estate',
'et',
'etisalat',
'eu',
'eurovision',
'eus',
'events',
'exchange',
'expert',
'exposed',
'express',
'extraspace',
'fage',
'fail',
'fairwinds',
'faith',
'family',
'fan',
'fans',
'farm',
'farmers',
'fashion',
'fast',
'fedex',
'feedback',
'ferrari',
'ferrero',
'fi',
'fiat',
'fidelity',
'fido',
'film',
'final',
'finance',
'financial',
'fire',
'firestone',
'firmdale',
'fish',
'fishing',
'fit',
'fitness',
'fj',
'fk',
'flickr',
'flights',
'flir',
'florist',
'flowers',
'fly',
'fm',
'fo',
'foo',
'food',
'foodnetwork',
'football',
'ford',
'forex',
'forsale',
'forum',
'foundation',
'fox',
'fr',
'free',
'fresenius',
'frl',
'frogans',
'frontdoor',
'frontier',
'ftr',
'fujitsu',
'fun',
'fund',
'furniture',
'futbol',
'fyi',
'ga',
'gal',
'gallery',
'gallo',
'gallup',
'game',
'games',
'gap',
'garden',
'gay',
'gb',
'gbiz',
'gd',
'gdn',
'ge',
'gea',
'gent',
'genting',
'george',
'gf',
'gg',
'ggee',
'gh',
'gi',
'gift',
'gifts',
'gives',
'giving',
'gl',
'glass',
'gle',
'global',
'globo',
'gm',
'gmail',
'gmbh',
'gmo',
'gmx',
'gn',
'godaddy',
'gold',
'goldpoint',
'golf',
'goo',
'goodyear',
'goog',
'google',
'gop',
'got',
'gov',
'gp',
'gq',
'gr',
'grainger',
'graphics',
'gratis',
'green',
'gripe',
'grocery',
'group',
'gs',
'gt',
'gu',
'guardian',
'gucci',
'guge',
'guide',
'guitars',
'guru',
'gw',
'gy',
'hair',
'hamburg',
'hangout',
'haus',
'hbo',
'hdfc',
'hdfcbank',
'health',
'healthcare',
'help',
'helsinki',
'here',
'hermes',
'hgtv',
'hiphop',
'hisamitsu',
'hitachi',
'hiv',
'hk',
'hkt',
'hm',
'hn',
'hockey',
'holdings',
'holiday',
'homedepot',
'homegoods',
'homes',
'homesense',
'honda',
'horse',
'hospital',
'host',
'hosting',
'hot',
'hoteles',
'hotels',
'hotmail',
'house',
'how',
'hr',
'hsbc',
'ht',
'hu',
'hughes',
'hyatt',
'hyundai',
'ibm',
'icbc',
'ice',
'icu',
'id',
'ie',
'ieee',
'ifm',
'ikano',
'il',
'im',
'imamat',
'imdb',
'immo',
'immobilien',
'in',
'inc',
'industries',
'infiniti',
'info',
'ing',
'ink',
'institute',
'insurance',
'insure',
'int',
'international',
'intuit',
'investments',
'io',
'ipiranga',
'iq',
'ir',
'irish',
'is',
'ismaili',
'ist',
'istanbul',
'it',
'itau',
'itv',
'jaguar',
'java',
'jcb',
'je',
'jeep',
'jetzt',
'jewelry',
'jio',
'jll',
'jm',
'jmp',
'jnj',
'jo',
'jobs',
'joburg',
'jot',
'joy',
'jp',
'jpmorgan',
'jprs',
'juegos',
'juniper',
'kaufen',
'kddi',
'ke',
'kerryhotels',
'kerrylogistics',
'kerryproperties',
'kfh',
'kg',
'kh',
'ki',
'kia',
'kids',
'kim',
'kinder',
'kindle',
'kitchen',
'kiwi',
'km',
'kn',
'koeln',
'komatsu',
'kosher',
'kp',
'kpmg',
'kpn',
'kr',
'krd',
'kred',
'kuokgroup',
'kw',
'ky',
'kyoto',
'kz',
'la',
'lacaixa',
'lamborghini',
'lamer',
'lancaster',
'lancia',
'land',
'landrover',
'lanxess',
'lasalle',
'lat',
'latino',
'latrobe',
'law',
'lawyer',
'lb',
'lc',
'lds',
'lease',
'leclerc',
'lefrak',
'legal',
'lego',
'lexus',
'lgbt',
'li',
'lidl',
'life',
'lifeinsurance',
'lifestyle',
'lighting',
'like',
'lilly',
'limited',
'limo',
'lincoln',
'linde',
'link',
'lipsy',
'live',
'living',
'lk',
'llc',
'llp',
'loan',
'loans',
'locker',
'locus',
'loft',
'lol',
'london',
'lotte',
'lotto',
'love',
'lpl',
'lplfinancial',
'lr',
'ls',
'lt',
'ltd',
'ltda',
'lu',
'lundbeck',
'luxe',
'luxury',
'lv',
'ly',
'ma',
'macys',
'madrid',
'maif',
'maison',
'makeup',
'man',
'management',
'mango',
'map',
'market',
'marketing',
'markets',
'marriott',
'marshalls',
'maserati',
'mattel',
'mba',
'mc',
'mckinsey',
'md',
'me',
'med',
'media',
'meet',
'melbourne',
'meme',
'memorial',
'men',
'menu',
'merckmsd',
'mg',
'mh',
'miami',
'microsoft',
'mil',
'mini',
'mint',
'mit',
'mitsubishi',
'mk',
'ml',
'mlb',
'mls',
'mm',
'mma',
'mn',
'mo',
'mobi',
'mobile',
'moda',
'moe',
'moi',
'mom',
'monash',
'money',
'monster',
'mormon',
'mortgage',
'moscow',
'moto',
'motorcycles',
'mov',
'movie',
'mp',
'mq',
'mr',
'ms',
'msd',
'mt',
'mtn',
'mtr',
'mu',
'museum',
'music',
'mutual',
'mv',
'mw',
'mx',
'my',
'mz',
'na',
'nab',
'nagoya',
'name',
'natura',
'navy',
'nba',
'nc',
'ne',
'nec',
'net',
'netbank',
'netflix',
'network',
'neustar',
'new',
'news',
'next',
'nextdirect',
'nexus',
'nf',
'nfl',
'ng',
'ngo',
'nhk',
'ni',
'nico',
'nike',
'nikon',
'ninja',
'nissan',
'nissay',
'nl',
'no',
'nokia',
'northwesternmutual',
'norton',
'now',
'nowruz',
'nowtv',
'np',
'nr',
'nra',
'nrw',
'ntt',
'nu',
'nyc',
'nz',
'obi',
'observer',
'office',
'okinawa',
'olayan',
'olayangroup',
'oldnavy',
'ollo',
'om',
'omega',
'one',
'ong',
'onl',
'online',
'ooo',
'open',
'oracle',
'orange',
'org',
'organic',
'origins',
'osaka',
'otsuka',
'ott',
'ovh',
'pa',
'page',
'panasonic',
'paris',
'pars',
'partners',
'parts',
'party',
'passagens',
'pay',
'pccw',
'pe',
'pet',
'pf',
'pfizer',
'pg',
'ph',
'pharmacy',
'phd',
'philips',
'phone',
'photo',
'photography',
'photos',
'physio',
'pics',
'pictet',
'pictures',
'pid',
'pin',
'ping',
'pink',
'pioneer',
'pizza',
'pk',
'pl',
'place',
'play',
'playstation',
'plumbing',
'plus',
'pm',
'pn',
'pnc',
'pohl',
'poker',
'politie',
'porn',
'post',
'pr',
'pramerica',
'praxi',
'press',
'prime',
'pro',
'prod',
'productions',
'prof',
'progressive',
'promo',
'properties',
'property',
'protection',
'pru',
'prudential',
'ps',
'pt',
'pub',
'pw',
'pwc',
'py',
'qa',
'qpon',
'quebec',
'quest',
'racing',
'radio',
're',
'read',
'realestate',
'realtor',
'realty',
'recipes',
'red',
'redstone',
'redumbrella',
'rehab',
'reise',
'reisen',
'reit',
'reliance',
'ren',
'rent',
'rentals',
'repair',
'report',
'republican',
'rest',
'restaurant',
'review',
'reviews',
'rexroth',
'rich',
'richardli',
'ricoh',
'ril',
'rio',
'rip',
'ro',
'rocher',
'rocks',
'rodeo',
'rogers',
'room',
'rs',
'rsvp',
'ru',
'rugby',
'ruhr',
'run',
'rw',
'rwe',
'ryukyu',
'sa',
'saarland',
'safe',
'safety',
'sakura',
'sale',
'salon',
'samsclub',
'samsung',
'sandvik',
'sandvikcoromant',
'sanofi',
'sap',
'sarl',
'sas',
'save',
'saxo',
'sb',
'sbi',
'sbs',
'sc',
'sca',
'scb',
'schaeffler',
'schmidt',
'scholarships',
'school',
'schule',
'schwarz',
'science',
'scot',
'sd',
'se',
'search',
'seat',
'secure',
'security',
'seek',
'select',
'sener',
'services',
'ses',
'seven',
'sew',
'sex',
'sexy',
'sfr',
'sg',
'sh',
'shangrila',
'sharp',
'shaw',
'shell',
'shia',
'shiksha',
'shoes',
'shop',
'shopping',
'shouji',
'show',
'showtime',
'si',
'silk',
'sina',
'singles',
'site',
'sj',
'sk',
'ski',
'skin',
'sky',
'skype',
'sl',
'sling',
'sm',
'smart',
'smile',
'sn',
'sncf',
'so',
'soccer',
'social',
'softbank',
'software',
'sohu',
'solar',
'solutions',
'song',
'sony',
'soy',
'spa',
'space',
'sport',
'spot',
'sr',
'srl',
'ss',
'st',
'stada',
'staples',
'star',
'statebank',
'statefarm',
'stc',
'stcgroup',
'stockholm',
'storage',
'store',
'stream',
'studio',
'study',
'style',
'su',
'sucks',
'supplies',
'supply',
'support',
'surf',
'surgery',
'suzuki',
'sv',
'swatch',
'swiss',
'sx',
'sy',
'sydney',
'systems',
'sz',
'tab',
'taipei',
'talk',
'taobao',
'target',
'tatamotors',
'tatar',
'tattoo',
'tax',
'taxi',
'tc',
'tci',
'td',
'tdk',
'team',
'tech',
'technology',
'tel',
'temasek',
'tennis',
'teva',
'tf',
'tg',
'th',
'thd',
'theater',
'theatre',
'tiaa',
'tickets',
'tienda',
'tiffany',
'tips',
'tires',
'tirol',
'tj',
'tjmaxx',
'tjx',
'tk',
'tkmaxx',
'tl',
'tm',
'tmall',
'tn',
'to',
'today',
'tokyo',
'tools',
'top',
'toray',
'toshiba',
'total',
'tours',
'town',
'toyota',
'toys',
'tr',
'trade',
'trading',
'training',
'travel',
'travelchannel',
'travelers',
'travelersinsurance',
'trust',
'trv',
'tt',
'tube',
'tui',
'tunes',
'tushu',
'tv',
'tvs',
'tw',
'tz',
'ua',
'ubank',
'ubs',
'ug',
'uk',
'unicom',
'university',
'uno',
'uol',
'ups',
'us',
'uy',
'uz',
'va',
'vacations',
'vana',
'vanguard',
'vc',
've',
'vegas',
'ventures',
'verisign',
'versicherung',
'vet',
'vg',
'vi',
'viajes',
'video',
'vig',
'viking',
'villas',
'vin',
'vip',
'virgin',
'visa',
'vision',
'viva',
'vivo',
'vlaanderen',
'vn',
'vodka',
'volkswagen',
'volvo',
'vote',
'voting',
'voto',
'voyage',
'vu',
'vuelos',
'wales',
'walmart',
'walter',
'wang',
'wanggou',
'watch',
'watches',
'weather',
'weatherchannel',
'webcam',
'weber',
'website',
'wed',
'wedding',
'weibo',
'weir',
'wf',
'whoswho',
'wien',
'wiki',
'williamhill',
'win',
'windows',
'wine',
'winners',
'wme',
'wolterskluwer',
'woodside',
'work',
'works',
'world',
'wow',
'ws',
'wtc',
'wtf',
'xbox',
'xerox',
'xfinity',
'xihuan',
'xin',
'xxx',
'xyz',
'yachts',
'yahoo',
'yamaxun',
'yandex',
'ye',
'yodobashi',
'yoga',
'yokohama',
'you',
'youtube',
'yt',
'yun',
'za',
'zappos',
'zara',
'zero',
'zip',
'zm',
'zone',
'zuerich',
'zw',
);
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
does not work for
sub.example.co
and returnsub.example.co