Last active
February 22, 2022 11:31
-
-
Save Scarjit/da7663bfd5a6754a1b41bb7cffe4032f to your computer and use it in GitHub Desktop.
This python file checks the private domain part of the PSL against 3 major DNS providers and writes an report to a json file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
This python file checks the private domain part of the PSL against 3 major DNS providers | |
and writes an report to a json file. | |
The json file only contains entries wich have no valid TXT record on there _psl. domain | |
as described here: https://github.com/publicsuffix/list/wiki/Guidelines#rfc8553-dns-authentication | |
It also filters any entry, which was submitted by E-Mail authentication | |
""" | |
from typing import Optional | |
import requests | |
import dns.resolver | |
from enum import Enum | |
import re | |
from joblib import Parallel, delayed | |
import json | |
res = dns.resolver.Resolver(configure=False) | |
res.nameservers = [ | |
# Google DNS | |
'8.8.8.8', '2001:4860:4860::8888', | |
'8.8.4.4', '2001:4860:4860::8844', | |
# Cloudflare DNS | |
'1.1.1.1', '2606:4700:4700::1111', | |
'1.0.0.1', '2606:4700:4700::1001', | |
# Quad9 | |
'9.9.9.9', '2620:fe::fe', | |
'149.112.112.112', '2620:fe::9' | |
] | |
class DIG_RESULT(str, Enum): | |
OK = "OK", | |
NO_ANSWER = "NO_ANSWER", | |
NXDOMAIN = "NXDOMAIN" | |
INVALID_URL = "INVALID_URL" | |
NO_URL = "NO_URL" | |
TIMEOUT = "TIMEOUT" | |
NO_NAMESERVERS = "NO_NAMESERVERS" | |
RE_URL = re.compile("(?P<url>https?://[^\s]+)") | |
def dig(url: str) -> dict: | |
try: | |
for rdata in res.resolve(f"_psl.{url}", 'TXT') : | |
redata = RE_URL.search(str(rdata)) | |
if redata == None: | |
return {"result": DIG_RESULT.NO_URL, "url": url,"dig_raw_response": str(rdata)} | |
returl = redata.group("url").replace("\"","") | |
if "https://github.com/publicsuffix/list/pull" in returl: | |
return {"result": DIG_RESULT.OK, "url": url} | |
else: | |
return {"result": DIG_RESULT.INVALID_URL, "url": url} | |
except dns.resolver.NoAnswer: | |
return {"result": DIG_RESULT.NO_ANSWER, "url": url} | |
except dns.resolver.NXDOMAIN: | |
return {"result": DIG_RESULT.NXDOMAIN, "url": url} | |
except dns.resolver.LifetimeTimeout: | |
return {"result": DIG_RESULT.TIMEOUT, "url": url} | |
except dns.resolver.NoNameservers: | |
return {"result": DIG_RESULT.NO_NAMESERVERS, "url": url} | |
url = "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat" | |
lines = requests.get(url).text | |
privates = [] | |
record = False | |
skipToNewLine = False | |
for l in lines.split('\n'): | |
if len(l) == 0: | |
skipToNewLine = False | |
continue | |
if l == "// ===BEGIN PRIVATE DOMAINS===": | |
record = True | |
if not record: | |
continue | |
if "// Submited by" in l or "// Submitted by" in l: | |
skipToNewLine = True | |
continue | |
if skipToNewLine: | |
continue | |
if l[0] == '/': | |
continue | |
url = l.strip().replace("*.","") | |
privates.append(url) | |
results = Parallel(n_jobs=32)(delayed(dig)(urlx) for urlx in privates) | |
filtered_results = list(filter(lambda ret: ret["result"] != DIG_RESULT.OK, results)) | |
with open('dig-result.json', 'w') as f: | |
json.dump(filtered_results, f, sort_keys=True, indent=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment