Skip to content

Instantly share code, notes, and snippets.

@aufzayed
Created July 9, 2020 10:28
Show Gist options
  • Save aufzayed/191c1e9c82a903e171f812d2ef450ba4 to your computer and use it in GitHub Desktop.
Save aufzayed/191c1e9c82a903e171f812d2ef450ba4 to your computer and use it in GitHub Desktop.
python script to organize big list of subdomains
#!/usr/bin/env python3
import re, sys, json
# usage: dorg.py subdomains_list.txt domain_name
# python3 dorg.py example_com_subs.txt example.com
file_path = sys.argv[1]
domain = sys.argv[2]
pattern = '[a-zA-Z0-9\-]+\.' + domain.replace('.', '\.')
def extract(file_path, pattern):
domains = []
fdomains = set()
with open(file_path) as dlist:
for dom in dlist:
domains.append(dom.split('\n')[0])
for dom in domains:
rx = re.findall(pattern, dom)
try:
fdomains.add(rx[0])
except IndexError:
pass
return domains, sorted(fdomains)
def organize(data):
domains = data[0]
fdomains = data[1]
out = {}
for d in fdomains:
out[d] = []
for dom in domains:
if re.search(pattern, dom):
out[re.findall(pattern, dom)[0]].append(dom)
return json.dumps(out)
print(organize(extract(file_path, pattern)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment