Skip to content

Instantly share code, notes, and snippets.

@waqaraqeel
Created February 8, 2019 15:15
Show Gist options
  • Save waqaraqeel/d05a28fac3f094adf55b2f8ed46f6e1e to your computer and use it in GitHub Desktop.
Save waqaraqeel/d05a28fac3f094adf55b2f8ed46f6e1e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
This is a quick and dirty script written out of pure annoyance at proofpoint
URLdefense making emails unreadable and unusable. Phishing is dangerous, we get
it. Maybe you code change the hyperlink instead of the actual text in the email?
This script will decode all URLs in stdin.
Adapted from https://help.proofpoint.com/Threat_Insight_Dashboard/Concepts/How_do_I_decode_a_rewritten_URL%3F.
Thank you to proofpoint for providing it.
Usage: ./urldefense_decoder.py < encoded_urls > decoded_urls
"""
__author__ = "Waqar Aqeel"
__version__ = "1.0"
__license__ = "MIT"
import sys
import re
import urllib.parse
import html.parser
re_v1 = re.compile(r"u=(.+?)&k=")
re_v2 = re.compile(r"u=(.+?)&[dc]=")
re_url = re.compile(r"(https://urldefense.proofpoint.com/(?P<version>v[0-9])/.*)")
def main():
input_str = sys.stdin.read()
matches = re_url.finditer(input_str)
last_stop = 0
for match in matches:
rewrittenurl = match.group(0)
print(input_str[last_stop : match.start()], end="")
last_stop = match.start() + len(rewrittenurl)
if match.group("version") == "v1":
print(decodev1(rewrittenurl), end="")
elif match.group("version") == "v2":
print(decodev2(rewrittenurl), end="")
else:
raise ValueError("Unrecognized version in: " + match.group(0))
print(input_str[last_stop:])
def decodev1(rewrittenurl):
match = re_v1.search(rewrittenurl)
if match:
urlencodedurl = match.group(1)
htmlencodedurl = urllib.parse.unquote(urlencodedurl)
url = html.parser.HTMLParser().unescape(htmlencodedurl)
return url
else:
return rewrittenurl
def decodev2(rewrittenurl):
match = re_v2.search(rewrittenurl)
if match:
specialencodedurl = match.group(1)
trans = str.maketrans("-_", "%/")
urlencodedurl = specialencodedurl.translate(trans)
htmlencodedurl = urllib.parse.unquote(urlencodedurl)
url = html.parser.HTMLParser().unescape(htmlencodedurl)
return url
else:
return rewrittenurl
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment