Remove all binary attachments from email messages
#!/usr/bin/python | |
# vim: set fileencoding=utf-8: | |
# Remove all binary attachments from email messages | |
# | |
# This is a standard UNIX filter; it reads a message or mailbox from standard | |
# input, and outputs it again on standard output, with all binary attachments | |
# removed (with message/external-body). | |
# | |
# Written by Frédéric Brière <fbriere@fbriere.net>. Copy at will. | |
import email | |
import email.message | |
import optparse | |
import sys | |
import time | |
# Adapted from <http://code.activestate.com/recipes/576553/>, originally | |
# written by Romain Dartigues. | |
def messages_list(input): | |
"""Extract a list of messages from a mailbox. | |
This parses a mbox mailbox, and returns a list of messages, as | |
email.message.Message objects. Unlike the mailbox module, this allows | |
reading from standard input. | |
""" | |
lines = [] | |
while True: | |
line = input.readline() | |
if line[:5] == 'From ' or line == '': | |
if lines: | |
yield email.message_from_string(''.join(lines)) | |
lines = [] | |
if line == '': | |
return | |
else: | |
lines.append(line) | |
def gut_message(message): | |
"""Remove body from a message, and wrap in a message/external-body.""" | |
wrapper = email.message.Message() | |
wrapper.add_header('Content-Type', 'message/external-body', | |
access_type='x-spam-deleted', | |
expiration=time.strftime("%a, %d %b %Y %H:%M:%S %z"), | |
size=str(len(message.get_payload()))) | |
message.set_payload('') | |
wrapper.set_payload([message]) | |
return wrapper | |
def message_is_binary(message): | |
"""Determine if a non-multipart message is of binary type.""" | |
return message.get_content_maintype() not in set(['text', 'message']) | |
def clean_message(message): | |
"""Clean a message of all its binary parts. | |
This guts all binary attachments, and returns the message itself for | |
convenience. | |
""" | |
if message.is_multipart(): | |
# Don't recurse in already-deleted attachments | |
if message.get_content_type() != 'message/external-body': | |
parts = message.get_payload() | |
parts[:] = map(clean_message, parts) | |
elif message_is_binary(message): | |
# TODO: Don't gut if this is the topmost message | |
message = gut_message(message) | |
return message | |
def main(): | |
usage = 'Usage: %prog [ --mbox ]' | |
parser = optparse.OptionParser(usage=usage) | |
parser.add_option('--mbox', action='store_true', default=False, | |
help='Input is in mbox format') | |
(options, args) = parser.parse_args() | |
if options.mbox: | |
messages = messages_list(sys.stdin) | |
else: | |
messages = [email.message_from_string(''.join(sys.stdin.readlines()))] | |
for message in messages: | |
print clean_message(message).as_string(unixfrom=True) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment