Skip to content

Instantly share code, notes, and snippets.



Last active Jan 22, 2018
What would you like to do?
Remove all binary attachments from email messages
# vim: set fileencoding=utf-8:
# Remove all binary attachments from email messages
# This is a standard UNIX filter; it reads a message or mailbox from standard
# input, and outputs it again on standard output, with all binary attachments
# removed (with message/external-body).
# Written by Frédéric Brière <>. Copy at will.
import email
import email.message
import optparse
import sys
import time
# Adapted from <>, originally
# written by Romain Dartigues.
def messages_list(input):
"""Extract a list of messages from a mailbox.
This parses a mbox mailbox, and returns a list of messages, as
email.message.Message objects. Unlike the mailbox module, this allows
reading from standard input.
lines = []
while True:
line = input.readline()
if line[:5] == 'From ' or line == '':
if lines:
yield email.message_from_string(''.join(lines))
lines = []
if line == '':
def gut_message(message):
"""Remove body from a message, and wrap in a message/external-body."""
wrapper = email.message.Message()
wrapper.add_header('Content-Type', 'message/external-body',
expiration=time.strftime("%a, %d %b %Y %H:%M:%S %z"),
return wrapper
def message_is_binary(message):
"""Determine if a non-multipart message is of binary type."""
return message.get_content_maintype() not in set(['text', 'message'])
def clean_message(message):
"""Clean a message of all its binary parts.
This guts all binary attachments, and returns the message itself for
if message.is_multipart():
# Don't recurse in already-deleted attachments
if message.get_content_type() != 'message/external-body':
parts = message.get_payload()
parts[:] = map(clean_message, parts)
elif message_is_binary(message):
# TODO: Don't gut if this is the topmost message
message = gut_message(message)
return message
def main():
usage = 'Usage: %prog [ --mbox ]'
parser = optparse.OptionParser(usage=usage)
parser.add_option('--mbox', action='store_true', default=False,
help='Input is in mbox format')
(options, args) = parser.parse_args()
if options.mbox:
messages = messages_list(sys.stdin)
messages = [email.message_from_string(''.join(sys.stdin.readlines()))]
for message in messages:
print clean_message(message).as_string(unixfrom=True)
if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment