Skip to content

Instantly share code, notes, and snippets.

@pocc
Created October 2, 2021 18:55
Show Gist options
  • Save pocc/aa084a7d1d3a85d479ce66d97474d67d to your computer and use it in GitHub Desktop.
Save pocc/aa084a7d1d3a85d479ce66d97474d67d to your computer and use it in GitHub Desktop.
A way to find CcSLD's that are also valid websites
# This script will check whether listed tlds will resolve to an IP address
import re
import requests
import socket
PUBLIC_SUFFIX_URL = 'https://publicsuffix.org/list/public_suffix_list.dat'
resp = requests.get(PUBLIC_SUFFIX_URL)
resptext = resp.text
# Remove comments and double new lines
new_resptext = re.sub(r'(?:^|\n)[\/\n][^\n]*', '', resptext)
tlds = new_resptext.split('\n')
invalid_tlds = []
error_tlds = []
for tld in tlds:
if '*' in tld:
continue
try:
addr = socket.gethostbyname(tld)
contact = re.search(r"([\w_.-]+@[\w_.-]+\.[\w_.-]+).*\n[À-ÿ\w\n._-]*?"+tld, resptext)[1]
invalid_tlds.append(f"{tld} {addr} {contact}")
print('\n', tld, "can be found at", addr, "with contact", contact)
except Exception as e:
print('.', sep='', end='')
with open("updated_public_suffix_list.dat", "w") as f:
print(invalid_tlds)
f.write('\n'.join(invalid_tlds))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment