Skip to content

Instantly share code, notes, and snippets.

@bodqhrohro
Last active June 14, 2019 09:52
Show Gist options
  • Save bodqhrohro/3bd9183e59c483c9186a90bc1dd21ad0 to your computer and use it in GitHub Desktop.
Save bodqhrohro/3bd9183e59c483c9186a90bc1dd21ad0 to your computer and use it in GitHub Desktop.
Decode the filenames of attachments exported with munpack
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from email.header import decode_header, HeaderParseError
import os
import re
import itertools
from pprint import pprint
encodings = ( 'UTF-8', 'utf-8', 'koi8-r', 'KOI8-R' )
formats = ( 'B', 'Q' )
patterns = list(("=?%s?%s?" % (enc, f) for enc, f in itertools.product(encodings, formats)))
spoiled_patterns = list((s.replace('?', 'X') for s in patterns))
for root, dirs, files in os.walk('.', topdown=False):
for filename in files:
full_name = root + '/' + filename
orig_name = full_name[2:]
for pattern, spoiled_pattern in zip(patterns, spoiled_patterns):
if (orig_name.startswith(spoiled_pattern)):
# cutout the suffices of duplicates
parts = re.match(r'(.*)(\.\d+)$', orig_name)
if (parts is None):
f = orig_name
suffix = None
else:
f = parts.group(1)
suffix = parts.group(2)
# cutout possible extensions out of the encoded part
while True:
parts = re.match(r'(.*)(\.[^\.]+)$', f)
if (parts is not None and 'X=' not in parts.group(2)):
f = parts.group(1)
suffix = parts.group(2) + ("" if suffix is None else suffix)
else:
break
# munpack spoils special characters, restoring them
f = re.sub(r'X=X?X?' + spoiled_pattern, "?=\r\n" + pattern, f)
f = re.sub(r'^' + spoiled_pattern, pattern, f)
f = re.sub(r'X=$', "?=", f)
if (not f.endswith('?=')):
f += '?='
try:
# decoding
h = decode_header(f)
if (len(h) <> 1 or h[0][1] is None or h[0][1] not in pattern.lower()):
print("Parsing error: %s" % f)
pprint(h)
continue
f = h[0][0]
enc = h[0][1]
try:
f = f.decode(enc)
# append the suffix back
if (suffix):
f += suffix
# clean the name from unwanted symbols
f = re.sub(r'[/\\\?\*]', '_', f)
# make sure the name would fit in a *NIX filesystem
if (len(f.encode('utf-8')) > 200):
f = u"%s…%s" % (f[:50], f[-50:])
try:
os.rename(full_name, f)
except OSError:
print("Rename error: `%s` to `%s`" % (full_name, f))
except UnicodeError,e:
print("Malformed string (%s): %s" % (e.reason, orig_name))
pprint(h)
pprint(e.object)
except HeaderParseError:
print("Couldn't parse: %s" % f)
# cleanup the emptied directories
if (root <> "." and len(os.listdir(root)) == 0):
os.rmdir(root)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment