Skip to content

Instantly share code, notes, and snippets.

@MOOOWOOO
Created April 7, 2023 11:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MOOOWOOO/a8e714bbf66e44d5755ceb5bce474510 to your computer and use it in GitHub Desktop.
Save MOOOWOOO/a8e714bbf66e44d5755ceb5bce474510 to your computer and use it in GitHub Desktop.
extract domain name from an URL likely string
import re
def extract_domain_name(url):
"""
Given a URL string, returns the domain name without the TLD (top-level domain),
or an empty string if the input is invalid.
"""
if not isinstance(url, str):
return ""
url = url.strip().lower()
pattern = r"(?:http[s]?://)?(?:www\.)?([^./]+)(?:\.[^./]+)*\.?(?:/[^/]*)?$"
match = re.search(pattern, url)
if not match:
return ""
return match.group(1)
from extract_domain_name import extract_domain_name
test_cases = ['http://www.example.com',
'http://www.example.cc/ad',
'www.example.net',
'www.example.net/ad',
'example.xyz/ad',
'example.com.cn',
'.example.org',
'.example.com/ad',
'example.',
'.example',
'.example.',
'example']
for test_case in test_cases:
domain_name = extract_domain_name(test_case)
print(f'{test_case}: {domain_name}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment