Skip to content

Instantly share code, notes, and snippets.

@HackingLZ
Last active December 5, 2023 22:50
  • Star 5 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save HackingLZ/8fed5fa4983b63b773380e1a8e82478a to your computer and use it in GitHub Desktop.
coalmine2.py
#!/usr/bin/python3
import re
import zipfile
import argparse
from urllib.parse import urlparse
from colorama import Fore, Style, init
init()
# Use sets for simplicity and readability
ignore_list = {
'purl.org',
'microsoft.com',
'openxmlformats.org',
'w3.org',
}
alert_list = {
'internalcanarytokendomain.org',
'canarytokens.com',
}
url_pattern = re.compile("(https?:\/\/[\w.-]+[\/\w .-]*)")
def parse_args():
ap = argparse.ArgumentParser()
ap.add_argument("--input", "-i", required=True, help="Input file")
return ap.parse_args()
def url_in_list(url, lst):
return any(urlparse(url).hostname.endswith(domain) for domain in lst)
def extract_urls_from_file(file):
matches = []
for line in file:
matches.extend(url_pattern.findall(line.decode('utf-8')))
return matches
def filter_urls(matches, ignore_list):
return [match for match in matches if not url_in_list(match, ignore_list)]
def print_colored_urls(matches, alert_list):
for match in matches:
foreground_color = Fore.RED if url_in_list(match, alert_list) else Fore.YELLOW
print(f"{foreground_color}{match}{Style.RESET_ALL}")
def main():
args = parse_args()
with zipfile.ZipFile(args.input) as doc:
matches = []
for i in doc.filelist:
with doc.open(i.filename) as file:
matches.extend(extract_urls_from_file(file))
matches = filter_urls(matches, ignore_list)
print_colored_urls(matches, alert_list)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment