Scarjit/public_suffic_private_section_dns_checker.py

## public_suffic_private_section_dns_checker.py
#!/usr/bin/env python3
"""
This python file checks the private domain part of the PSL against 3 major DNS providers
and writes an report to a json file.

The json file only contains entries wich have no valid TXT record on there _psl. domain
as described here: https://github.com/publicsuffix/list/wiki/Guidelines#rfc8553-dns-authentication

It also filters any entry, which was submitted by E-Mail authentication
"""
from typing import Optional
import requests
import dns.resolver
from enum import Enum
import re
from joblib import Parallel, delayed
import json

res = dns.resolver.Resolver(configure=False)
res.nameservers = [
    # Google DNS
    '8.8.8.8',          '2001:4860:4860::8888',
    '8.8.4.4',          '2001:4860:4860::8844',
    # Cloudflare DNS
    '1.1.1.1',          '2606:4700:4700::1111',
    '1.0.0.1',          '2606:4700:4700::1001',
    # Quad9
    '9.9.9.9',          '2620:fe::fe',
    '149.112.112.112',  '2620:fe::9'
]

class DIG_RESULT(str, Enum):
    OK = "OK",
    NO_ANSWER = "NO_ANSWER",
    NXDOMAIN = "NXDOMAIN"
    INVALID_URL = "INVALID_URL"
    NO_URL = "NO_URL"
    TIMEOUT = "TIMEOUT"
    NO_NAMESERVERS = "NO_NAMESERVERS"


RE_URL = re.compile("(?P<url>https?://[^\s]+)")

def dig(url: str) -> dict:
    try:
        for rdata in res.resolve(f"_psl.{url}", 'TXT') :
            redata = RE_URL.search(str(rdata))
            if redata == None:
                return {"result": DIG_RESULT.NO_URL, "url": url,"dig_raw_response": str(rdata)}
            returl = redata.group("url").replace("\"","")
            if "https://github.com/publicsuffix/list/pull" in returl:
                return {"result": DIG_RESULT.OK, "url": url}
            else:
                return {"result": DIG_RESULT.INVALID_URL, "url": url}
    except dns.resolver.NoAnswer:
        return {"result": DIG_RESULT.NO_ANSWER, "url": url}
    except dns.resolver.NXDOMAIN:
        return {"result": DIG_RESULT.NXDOMAIN, "url": url}
    except dns.resolver.LifetimeTimeout:
        return {"result": DIG_RESULT.TIMEOUT, "url": url}
    except dns.resolver.NoNameservers:
        return {"result": DIG_RESULT.NO_NAMESERVERS, "url": url}


url = "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat"
lines = requests.get(url).text
privates = []
record = False
skipToNewLine = False

for l in lines.split('\n'):
    if len(l) == 0:
        skipToNewLine = False
        continue
    if l == "// ===BEGIN PRIVATE DOMAINS===":
        record = True
    if not record:
        continue

    if "// Submited by" in l or "// Submitted by" in l:
        skipToNewLine = True
        continue

    if skipToNewLine:
        continue

    if l[0] == '/':
        continue

    url = l.strip().replace("*.","")
    privates.append(url)

results = Parallel(n_jobs=32)(delayed(dig)(urlx) for urlx in privates)

filtered_results = list(filter(lambda ret: ret["result"] != DIG_RESULT.OK, results))

with open('dig-result.json', 'w') as f:
    json.dump(filtered_results, f, sort_keys=True, indent=4)
	#!/usr/bin/env python3
	"""
	This python file checks the private domain part of the PSL against 3 major DNS providers
	and writes an report to a json file.

	The json file only contains entries wich have no valid TXT record on there _psl. domain
	as described here: https://github.com/publicsuffix/list/wiki/Guidelines#rfc8553-dns-authentication

	It also filters any entry, which was submitted by E-Mail authentication
	"""
	from typing import Optional
	import requests
	import dns.resolver
	from enum import Enum
	import re
	from joblib import Parallel, delayed
	import json

	res = dns.resolver.Resolver(configure=False)
	res.nameservers = [
	# Google DNS
	'8.8.8.8', '2001:4860:4860::8888',
	'8.8.4.4', '2001:4860:4860::8844',
	# Cloudflare DNS
	'1.1.1.1', '2606:4700:4700::1111',
	'1.0.0.1', '2606:4700:4700::1001',
	# Quad9
	'9.9.9.9', '2620:fe::fe',
	'149.112.112.112', '2620:fe::9'
	]

	class DIG_RESULT(str, Enum):
	OK = "OK",
	NO_ANSWER = "NO_ANSWER",
	NXDOMAIN = "NXDOMAIN"
	INVALID_URL = "INVALID_URL"
	NO_URL = "NO_URL"
	TIMEOUT = "TIMEOUT"
	NO_NAMESERVERS = "NO_NAMESERVERS"


	RE_URL = re.compile("(?P<url>https?://[^\s]+)")

	def dig(url: str) -> dict:
	try:
	for rdata in res.resolve(f"_psl.{url}", 'TXT') :
	redata = RE_URL.search(str(rdata))
	if redata == None:
	return {"result": DIG_RESULT.NO_URL, "url": url,"dig_raw_response": str(rdata)}
	returl = redata.group("url").replace("\"","")
	if "https://github.com/publicsuffix/list/pull" in returl:
	return {"result": DIG_RESULT.OK, "url": url}
	else:
	return {"result": DIG_RESULT.INVALID_URL, "url": url}
	except dns.resolver.NoAnswer:
	return {"result": DIG_RESULT.NO_ANSWER, "url": url}
	except dns.resolver.NXDOMAIN:
	return {"result": DIG_RESULT.NXDOMAIN, "url": url}
	except dns.resolver.LifetimeTimeout:
	return {"result": DIG_RESULT.TIMEOUT, "url": url}
	except dns.resolver.NoNameservers:
	return {"result": DIG_RESULT.NO_NAMESERVERS, "url": url}


	url = "https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat"
	lines = requests.get(url).text
	privates = []
	record = False
	skipToNewLine = False

	for l in lines.split('\n'):
	if len(l) == 0:
	skipToNewLine = False
	continue
	if l == "// ===BEGIN PRIVATE DOMAINS===":
	record = True
	if not record:
	continue

	if "// Submited by" in l or "// Submitted by" in l:
	skipToNewLine = True
	continue

	if skipToNewLine:
	continue

	if l[0] == '/':
	continue

	url = l.strip().replace("*.","")
	privates.append(url)

	results = Parallel(n_jobs=32)(delayed(dig)(urlx) for urlx in privates)

	filtered_results = list(filter(lambda ret: ret["result"] != DIG_RESULT.OK, results))

	with open('dig-result.json', 'w') as f:
	json.dump(filtered_results, f, sort_keys=True, indent=4)