Last active
July 25, 2024 02:01
-
-
Save OVlasiuk/7afbbe4fc75e27ed408a332a2b3f2494 to your computer and use it in GitHub Desktop.
Rewrite URL Defense and Safelinks URLs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
__author__ = 'Eric Van Cleve; Alex Vlasiuk' | |
__license__ = 'GPL v.3' | |
__version__ = '4.0b' | |
__email__ = 'alex@vlasiuk.com' | |
__status__ = 'beta' | |
import sys | |
import re | |
from base64 import urlsafe_b64decode | |
# Uncomment for Python 2 | |
# import string | |
# if sys.version_info[0] < 3: | |
# from urllib import unquote | |
# import HTMLParser | |
# htmlparser = HTMLParser.HTMLParser() | |
# unescape = htmlparser.unescape | |
# from string import maketrans | |
# else: | |
from urllib.parse import unquote | |
from html import unescape | |
maketrans = str.maketrans | |
class URLDecoder: | |
def __init__(self): | |
URLDecoder.ud_pattern = re.compile(r'http.+?urldefense(?:\.proofpoint)?\.com/(v[0-9])/') | |
URLDecoder.v1_pattern = re.compile(r'u=(?P<url>.+?)&k=') | |
URLDecoder.v2_pattern = re.compile(r'http.+?urldefense\.proofpoint\.com/v2/url\?u=(?P<url>.+?)&[dc]=.*?&e=') | |
URLDecoder.v3_pattern = re.compile(r'http.+?urldefense\.com/v3/__(?P<url>.+?)__;(?P<enc_bytes>.*?)!.*?\$') | |
URLDecoder.v3_token_pattern = re.compile(r'\*(\*.)?') | |
URLDecoder.v3_run_mapping = {'A': 2, 'B': 3, 'C': 4, 'D': 5, 'E': | |
6, 'F': 7, 'G': 8, 'H': 9, 'I': 10, | |
'J': 11, 'K': 12, 'L': 13, 'M': 14, | |
'N': 15, 'O': 16, 'P': 17, 'Q': 18, | |
'R': 19, 'S': 20, 'T': 21, 'U': 22, | |
'V': 23, 'W': 24, 'X': 25, 'Y': 26, | |
'Z': 27, 'a': 28, 'b': 29, 'c': 30, | |
'd': 31, 'e': 32, 'f': 33, 'g': 34, | |
'h': 35, 'i': 36, 'j': 37, 'k': 38, | |
'l': 39, 'm': 40, 'n': 41, 'o': 42, | |
'p': 43, 'q': 44, 'r': 45, 's': 46, | |
't': 47, 'u': 48, 'v': 49, 'w': 50, | |
'x': 51, 'y': 52, 'z': 53, '0': 54, | |
'1': 55, '2': 56, '3': 57, '4': 58, | |
'5': 59, '6': 60, '7': 61, '8': 62, | |
'9': 63, '-': 64, '_': 65} | |
URLDecoder.safelinks = re.compile(r'http.+?\.safelinks\.protection\.outlook\.com/\?url=(?P<url>.+?)&(?:amp;)?data=.*?reserved=0') | |
def sanitize(self, line): | |
rem1 = r"\[EXTERNAL\] " | |
rem2 = r"You don't often get email from.+?LearnAboutSenderIdentification>" | |
rem3 = r"\[You don't often get email from.+?\]" | |
line = re.sub(rem1, "", line, flags=re.IGNORECASE) | |
line = re.sub(rem2, "", line) | |
line = re.sub(rem3, "", line) | |
return line | |
def decode(self, line): | |
line = self.sanitize(line) | |
match = self.ud_pattern.search(line) | |
match_safelinks = self.safelinks.search(line) | |
if match: | |
if match.group(1) == 'v1': | |
line = self.v1_pattern.sub(self.unescquote, line) | |
elif match.group(1) == 'v2': | |
line = self.v2_pattern.sub(self.decode_v2, line) | |
elif match.group(1) == 'v3': | |
line = self.v3_pattern.sub(self.decode_v3, line) | |
if match_safelinks: | |
line = self.safelinks.sub(self.unescquote, line) | |
return line | |
def unescquote(self, match): | |
return unescape(unquote(match.group('url'))) | |
def decode_v2(self, match): | |
trans = maketrans('-_', '%/') | |
url_encoded_url = match.group('url').translate(trans) | |
return unescape(unquote(url_encoded_url)) | |
def decode_v3(self, match): | |
def replace_token(token): | |
if token == '*': | |
character = self.dec_bytes[self.current_marker] | |
self.current_marker += 1 | |
return character | |
if token.startswith('**'): | |
run_length = self.v3_run_mapping[token[-1]]//2 | |
run = self.dec_bytes[self.current_marker:self.current_marker+run_length] | |
self.current_marker += run_length | |
return run | |
def substitute_tokens(text, start_pos=0): | |
match = self.v3_token_pattern.search(text, start_pos) | |
if match: | |
start = text[start_pos:match.start()] | |
built_string = start | |
token = text[match.start():match.end()] | |
built_string += replace_token(token) | |
built_string += substitute_tokens(text, match.end()) | |
return built_string | |
else: | |
return text[start_pos:len(text)] | |
url = match.group('url') | |
encoded_url = unquote(url) | |
enc_bytes = match.group('enc_bytes') | |
enc_bytes += '==' | |
self.dec_bytes = (urlsafe_b64decode(enc_bytes)).decode('utf-8') | |
self.current_marker = 0 | |
return substitute_tokens(encoded_url) | |
def main(): | |
urldec = URLDecoder() | |
for line in sys.stdin: | |
sys.stdout.write(urldec.decode(line)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If you're interested, I did some cleanup and made this recursive. Not sure how to add that to a gist, sorry.