Skip to content

Instantly share code, notes, and snippets.

@fbriere
Last active January 22, 2018 13:23
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fbriere/e86584a807449e3128c0 to your computer and use it in GitHub Desktop.
Save fbriere/e86584a807449e3128c0 to your computer and use it in GitHub Desktop.
Remove all binary attachments from email messages
#!/usr/bin/python
# vim: set fileencoding=utf-8:
# Remove all binary attachments from email messages
#
# This is a standard UNIX filter; it reads a message or mailbox from standard
# input, and outputs it again on standard output, with all binary attachments
# removed (with message/external-body).
#
# Written by Frédéric Brière <fbriere@fbriere.net>. Copy at will.
import email
import email.message
import optparse
import sys
import time
# Adapted from <http://code.activestate.com/recipes/576553/>, originally
# written by Romain Dartigues.
def messages_list(input):
"""Extract a list of messages from a mailbox.
This parses a mbox mailbox, and returns a list of messages, as
email.message.Message objects. Unlike the mailbox module, this allows
reading from standard input.
"""
lines = []
while True:
line = input.readline()
if line[:5] == 'From ' or line == '':
if lines:
yield email.message_from_string(''.join(lines))
lines = []
if line == '':
return
else:
lines.append(line)
def gut_message(message):
"""Remove body from a message, and wrap in a message/external-body."""
wrapper = email.message.Message()
wrapper.add_header('Content-Type', 'message/external-body',
access_type='x-spam-deleted',
expiration=time.strftime("%a, %d %b %Y %H:%M:%S %z"),
size=str(len(message.get_payload())))
message.set_payload('')
wrapper.set_payload([message])
return wrapper
def message_is_binary(message):
"""Determine if a non-multipart message is of binary type."""
return message.get_content_maintype() not in set(['text', 'message'])
def clean_message(message):
"""Clean a message of all its binary parts.
This guts all binary attachments, and returns the message itself for
convenience.
"""
if message.is_multipart():
# Don't recurse in already-deleted attachments
if message.get_content_type() != 'message/external-body':
parts = message.get_payload()
parts[:] = map(clean_message, parts)
elif message_is_binary(message):
# TODO: Don't gut if this is the topmost message
message = gut_message(message)
return message
def main():
usage = 'Usage: %prog [ --mbox ]'
parser = optparse.OptionParser(usage=usage)
parser.add_option('--mbox', action='store_true', default=False,
help='Input is in mbox format')
(options, args) = parser.parse_args()
if options.mbox:
messages = messages_list(sys.stdin)
else:
messages = [email.message_from_string(''.join(sys.stdin.readlines()))]
for message in messages:
print clean_message(message).as_string(unixfrom=True)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment