Skip to content

Instantly share code, notes, and snippets.

@Scarjit
Last active February 22, 2022 11:31
Show Gist options
  • Save Scarjit/da7663bfd5a6754a1b41bb7cffe4032f to your computer and use it in GitHub Desktop.
Save Scarjit/da7663bfd5a6754a1b41bb7cffe4032f to your computer and use it in GitHub Desktop.
This python file checks the private domain part of the PSL against 3 major DNS providers and writes an report to a json file.
#!/usr/bin/env python3
"""
This python file checks the private domain part of the PSL against 3 major DNS providers
and writes an report to a json file.
The json file only contains entries wich have no valid TXT record on there _psl. domain
as described here: https://github.com/publicsuffix/list/wiki/Guidelines#rfc8553-dns-authentication
It also filters any entry, which was submitted by E-Mail authentication
"""
from typing import Optional
import requests
import dns.resolver
from enum import Enum
import re
from joblib import Parallel, delayed
import json
res = dns.resolver.Resolver(configure=False)
res.nameservers = [
# Google DNS
'8.8.8.8', '2001:4860:4860::8888',
'8.8.4.4', '2001:4860:4860::8844',
# Cloudflare DNS
'1.1.1.1', '2606:4700:4700::1111',
'1.0.0.1', '2606:4700:4700::1001',
# Quad9
'9.9.9.9', '2620:fe::fe',
'149.112.112.112', '2620:fe::9'
]
class DIG_RESULT(str, Enum):
OK = "OK",
NO_ANSWER = "NO_ANSWER",
NXDOMAIN = "NXDOMAIN"
INVALID_URL = "INVALID_URL"
NO_URL = "NO_URL"
TIMEOUT = "TIMEOUT"
NO_NAMESERVERS = "NO_NAMESERVERS"
RE_URL = re.compile("(?P<url>https?://[^\s]+)")
def dig(url: str) -> dict:
try:
for rdata in res.resolve(f"_psl.{url}", 'TXT') :
redata = RE_URL.search(str(rdata))
if redata == None:
return {"result": DIG_RESULT.NO_URL, "url": url,"dig_raw_response": str(rdata)}
returl = redata.group("url").replace("\"","")
if "https://github.com/publicsuffix/list/pull" in returl:
return {"result": DIG_RESULT.OK, "url": url}
else:
return {"result": DIG_RESULT.INVALID_URL, "url": url}
except dns.resolver.NoAnswer:
return {"result": DIG_RESULT.NO_ANSWER, "url": url}
except dns.resolver.NXDOMAIN:
return {"result": DIG_RESULT.NXDOMAIN, "url": url}
except dns.resolver.LifetimeTimeout:
return {"result": DIG_RESULT.TIMEOUT, "url": url}
except dns.resolver.NoNameservers:
return {"result": DIG_RESULT.NO_NAMESERVERS, "url": url}
url = "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat"
lines = requests.get(url).text
privates = []
record = False
skipToNewLine = False
for l in lines.split('\n'):
if len(l) == 0:
skipToNewLine = False
continue
if l == "// ===BEGIN PRIVATE DOMAINS===":
record = True
if not record:
continue
if "// Submited by" in l or "// Submitted by" in l:
skipToNewLine = True
continue
if skipToNewLine:
continue
if l[0] == '/':
continue
url = l.strip().replace("*.","")
privates.append(url)
results = Parallel(n_jobs=32)(delayed(dig)(urlx) for urlx in privates)
filtered_results = list(filter(lambda ret: ret["result"] != DIG_RESULT.OK, results))
with open('dig-result.json', 'w') as f:
json.dump(filtered_results, f, sort_keys=True, indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment