Megabytemb/SeekExtract.py

## example.txt
###### to generate this file, go into Gmail, open an email, and go "show original", then paste the contents of this into a file.

## SeekExtract.py
import email
import argparse
from bs4 import BeautifulSoup

parser = argparse.ArgumentParser()
parser.add_argument("-e", "--email", help="Path to Email file", default="example.txt")
args = parser.parse_args()

def _extractHtml(msg):
  response = ""
  for part in msg.walk():
    # each part is a either non-multipart, or another multipart message
    # that contains further parts... Message is organized like a tree
    if part.get_content_type() == 'text/html':
        response += part.get_payload() # prints the raw text
  if response == "":
    raise ValueError("Email did not contain HTML")
  return response

def parseEmail(rawEmail):
  msg = email.message_from_string(rawEmail)
  html = _extractHtml(msg)
  soup = BeautifulSoup(html, 'html.parser')

  ## We have have a Soup object, which means we can loop on stuff like here  https://www.crummy.com/software/BeautifulSoup/bs4/doc/

  return soup.prettify()

def main(args):
  rawEmail = open(args.email).read()
  print parseEmail(rawEmail)

if __name__ == "__main__":
  main(args)
	import email
	import argparse
	from bs4 import BeautifulSoup

	parser = argparse.ArgumentParser()
	parser.add_argument("-e", "--email", help="Path to Email file", default="example.txt")
	args = parser.parse_args()

	def _extractHtml(msg):
	response = ""
	for part in msg.walk():
	# each part is a either non-multipart, or another multipart message
	# that contains further parts... Message is organized like a tree
	if part.get_content_type() == 'text/html':
	response += part.get_payload() # prints the raw text
	if response == "":
	raise ValueError("Email did not contain HTML")
	return response

	def parseEmail(rawEmail):
	msg = email.message_from_string(rawEmail)
	html = _extractHtml(msg)
	soup = BeautifulSoup(html, 'html.parser')

	## We have have a Soup object, which means we can loop on stuff like here https://www.crummy.com/software/BeautifulSoup/bs4/doc/

	return soup.prettify()

	def main(args):
	rawEmail = open(args.email).read()
	print parseEmail(rawEmail)

	if __name__ == "__main__":
	main(args)