Skip to content

Instantly share code, notes, and snippets.

@adlerweb
Created September 30, 2023 11:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adlerweb/2be1a440bfadff1a521bfbb841c1d5ec to your computer and use it in GitHub Desktop.
Save adlerweb/2be1a440bfadff1a521bfbb841c1d5ec to your computer and use it in GitHub Desktop.
Extract target URL from a mandrill tracking link. Based on https://gist.github.com/medmunds/1b696ee88ccb0480d71f
import sys
import json
import base64
import argparse
from binascii import Error as BinasciiError
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, parse_qsl
version = "0.1"
def urlsafe_base64_decode(s):
"""
Decode a base64 encoded string. Add back any trailing equal signs that
might have been stripped.
(via https://docs.djangoproject.com/en/4.2/_modules/django/utils/http/#urlsafe_base64_decode)
"""
s = s.encode()
try:
return base64.urlsafe_b64decode(s.ljust(len(s) + len(s) % 4, b"="))
except (LookupError, BinasciiError) as e:
raise ValueError(e)
def remove_utm(s):
"""
Remove URL parametes starting with utm_, as these are usually only used
for tracking.
(see https://en.wikipedia.org/wiki/UTM_parameters)
"""
parsed_url = urlparse(s)
# Reconstruct the original URL without utm_ parameters
query_parameters = parse_qsl(parsed_url.query)
filtered_parameters = [(key, value) for key, value in query_parameters if not key.startswith('utm_')]
new_query = urlencode(filtered_parameters)
# Reconstruct the URL with the modified query parameters
filtered_url = urlunparse(parsed_url._replace(query=new_query))
return filtered_url
def mandrill_extract(tracking_url):
"""
Extract original URL from a mandrill link.
(via https://gist.github.com/medmunds/1b696ee88ccb0480d71f)
"""
# Parse the URL
parsed_url = urlparse(tracking_url)
# Extract components
account_id = parsed_url.path.split('/')[-2] # Extract account ID
base_url = parsed_url.path.split('/')[-1] # Extract base URL
data_field = parse_qs(parsed_url.query)['p'][0] # Extract data field
payload = json.loads(urlsafe_base64_decode(data_field))
params = json.loads(payload['p'])
original_url = params['url']
filtered_url = remove_utm(original_url)
return account_id, base_url, data_field, original_url, filtered_url
def main():
parser = argparse.ArgumentParser(description='Decode a mandrill tracking link and reconstruct the original URL without tracking')
parser.add_argument('tracking_url', help='The tracking URL to process')
parser.add_argument('--version', action='store_true', help='Show version info')
parser.add_argument('-j', '--json', action='store_true', help='Output all fields as JSON')
parser.add_argument('-v', '--verbose', action='store_true', help='Output all variants individually')
parser.add_argument('-u', '--unfiltered', action='store_true', help='Output decoded link with tracking parameters')
args = parser.parse_args()
if args.version:
print(f"Mandrill link decoder\n Version {version}")
sys.exit(0)
tracking_url = args.tracking_url
try:
account_id, base_url, data_field, original_url, filtered_url = mandrill_extract(tracking_url)
except Exception as e:
print("Unable to parse link; exiting.", file=sys.stderr)
print(f"Error: {str(e)}", file=sys.stderr)
sys.exit(1)
if args.json:
result = {
"AccountID": account_id,
"BaseURL": base_url,
"DataField": data_field,
"OriginalURL": original_url,
"FilteredURL": filtered_url
}
print(json.dumps(result, indent=4))
elif args.verbose:
print("Account ID:", account_id)
print("Base URL:", base_url)
print("Original URL:", original_url)
print("Filtered URL:", filtered_url)
elif args.unfiltered:
print(original_url)
else:
print(filtered_url)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment