Skip to content

Instantly share code, notes, and snippets.

@KrissN
Created April 30, 2019 11:59
Show Gist options
  • Save KrissN/dc012474c0c41e3bcd2776096543c9e5 to your computer and use it in GitHub Desktop.
Save KrissN/dc012474c0c41e3bcd2776096543c9e5 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
from email.parser import Parser
import re
import sys
known_quote_header_marks = (
"-----Original Message-----",
"_____________________________________________",
"-----Original Appointment-----",
)
headerre = re.compile(r"^(\w+) ?: (.*)$")
from_words = ("From", "Von", "De")
sent_words = ("Sent", "Gesendet", "Envoyé")
def detect_quote_header(lines):
#print(lines)
if len(lines) < 4:
return None
m = headerre.match(lines[0])
if m is not None and m.group(1) in from_words:
idx = 0
elif lines[0].strip() not in known_quote_header_marks:
return None
else:
idx = 1
if len(lines[idx].strip()) == 0:
idx += 1
m = headerre.match(lines[idx])
if m is None or m.group(1) not in from_words:
return None
sender = m.group(2)
if len(lines[idx].strip()) == 0:
idx += 1
m = headerre.match(lines[idx+1])
if m is None or m.group(1) not in sent_words:
return None
date = m.group(2)
end_idx = None
for i in range(idx+1, len(lines)):
if len(lines[i].strip()) == 0:
end_idx = i
break
if end_idx is None:
return None
for i in range(end_idx, len(lines)):
if len(lines[i].strip()) != 0:
end_idx = i
break
return (sender.strip(), date.strip(), end_idx)
def get_message_text(message):
charset = 'ascii'
for key, val in message.get_params():
if key == 'charset':
charset = val
return (charset, message.get_payload(decode=True).decode(val))
MAX_LEVEL = 5
def requote(lines, level):
message_lines = []
text = ""
for i in range(0, len(lines)):
info = detect_quote_header(lines[i:i+10])
if info is None:
message_lines.append(lines[i])
else:
sender, date, end_idx = info
text += ">" * level + " On {} {} wrote:\n".format(date, sender)
if level <= MAX_LEVEL:
text += requote(lines[i+end_idx:], level + 1)
else:
for line in lines[i+end_idx:]:
text += ">" * (level + 1) + " " + line + "\n"
break
for line in message_lines:
text += ">" * level + " " + line + "\n"
return text
def retrieve_text_from_multipart(message):
charset = None
text = None
for part in message.get_payload():
#print(part.get_content_type(), part.items())
if part.get_content_type() == 'text/plain' and 'Content-Disposition' not in part:
charset, text = get_message_text(part)
elif part.get_content_type() == 'multipart/alternative':
charset, text = retrieve_text_from_multipart(part)
return (charset, text)
def main():
parser = Parser()
message = parser.parse(sys.stdin)
text = None
if message.is_multipart():
charset, text = retrieve_text_from_multipart(message)
else:
charset, text = get_message_text(message)
if text is None:
print("> *** No usable payload ***")
return
lines = text.split('\n')
text = requote(lines, 1)
sys.stdout.buffer.write(text.encode('utf-8'))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment