Last active
October 12, 2016 10:05
-
-
Save Megabytemb/591d96cff71d0b4bc14e83062dd021e9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###### to generate this file, go into Gmail, open an email, and go "show original", then paste the contents of this into a file. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import email | |
import argparse | |
from bs4 import BeautifulSoup | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-e", "--email", help="Path to Email file", default="example.txt") | |
args = parser.parse_args() | |
def _extractHtml(msg): | |
response = "" | |
for part in msg.walk(): | |
# each part is a either non-multipart, or another multipart message | |
# that contains further parts... Message is organized like a tree | |
if part.get_content_type() == 'text/html': | |
response += part.get_payload() # prints the raw text | |
if response == "": | |
raise ValueError("Email did not contain HTML") | |
return response | |
def parseEmail(rawEmail): | |
msg = email.message_from_string(rawEmail) | |
html = _extractHtml(msg) | |
soup = BeautifulSoup(html, 'html.parser') | |
## We have have a Soup object, which means we can loop on stuff like here https://www.crummy.com/software/BeautifulSoup/bs4/doc/ | |
return soup.prettify() | |
def main(args): | |
rawEmail = open(args.email).read() | |
print parseEmail(rawEmail) | |
if __name__ == "__main__": | |
main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment