Created
April 22, 2022 21:42
-
-
Save gauravjuvekar/190206d4bac097c4cd74f3c94e293265 to your computer and use it in GitHub Desktop.
no more safelinks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python | |
import email | |
import email.message | |
import email.parser | |
import email.policy | |
import html | |
import re | |
import sys | |
import urllib | |
import urllib.parse | |
def normalize_url(match): | |
text = match.group(0) | |
text = html.unescape(text) | |
url = urllib.parse.urlparse(text) | |
params = urllib.parse.parse_qs(url.query) | |
normalized = params['url'][0] | |
return urllib.parse.quote(normalized) | |
def replace_urls(text, htmlescape=False): | |
def htmlescape_wrap(match): | |
return html.escape(normalize_url(match)) | |
text = re.sub( | |
r'(?<!href=["\'])(?<!href=)https?:\/\/.*?\.safelinks\.protection\.outlook\.com\/.*?reserved=\d+', | |
htmlescape_wrap if htmlescape else normalize_url, | |
text) | |
return text | |
def transform_html(data): | |
text = data.decode('ascii') | |
# TODO: There's probably a better way of applying this to only text | |
# contents (not in a <a href=) than just negative-lookbehinds in the regex | |
# using BeautifulSoup, but this works well enough for now. | |
text = replace_urls(text, htmlescape=True) | |
return text.encode() | |
def transfrom_text(data): | |
text = data.decode('ascii') | |
text = replace_urls(text) | |
return text.encode() | |
def main(data): | |
parser = email.parser.BytesParser(policy=email.policy.SMTP) | |
em = parser.parsebytes(data) | |
for part in em.walk(): | |
original_type = part.get_content_type() | |
if part.get_content_type() in ('text/plain', 'text'): | |
part.set_content(transfrom_text(part.get_payload(decode=True)), | |
maintype='text', subtype='plain') | |
elif part.get_content_type() in ('text/html',): | |
part.set_content(transform_html(part.get_payload(decode=True)), | |
maintype='text', subtype='html') | |
return em.as_bytes() | |
if __name__ == '__main__': | |
sys.stdout.buffer.write(main(sys.stdin.buffer.read())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment