Skip to content

Instantly share code, notes, and snippets.

@yak1ex
Created September 18, 2019 00:00
Show Gist options
  • Save yak1ex/b67fb1080712cdb09460dc3c24bf0ede to your computer and use it in GitHub Desktop.
Save yak1ex/b67fb1080712cdb09460dc3c24bf0ede to your computer and use it in GitHub Desktop.
eml by futakuro splitter
import sys
import re
from email.parser import BytesParser, Parser
from email.policy import default
from pathlib import Path
from urllib.parse import urlparse
from html.parser import HTMLParser
class URLConverter(HTMLParser):
def __init__(self, mapping):
super().__init__()
self._mapping = mapping
self._out = ''
def handle_decl(self, decl):
self._out = self._out + '<!' + decl + '>'
def handle_starttag(self, tag, attrs):
def mapper(attrs_):
for (k, v) in attrs_:
check = (k == 'src' or k == 'href') and v in self._mapping
yield k, self._mapping[v] if check else v
self._out = self._out + '<' + tag + ' ' + ' '.join(k + '="' + v + '"' for (k,v) in mapper(attrs)) + '>'
def handle_endtag(self, tag):
self._out = self._out + '</' + tag + '>'
def handle_data(self, data):
self._out = self._out + data
def get_output(self):
return self._out
def make_path(in_path: str) -> Path:
if not in_path:
return None
url = urlparse(in_path)
path = Path(url.path)
if 'res' in path.parts:
return Path('.').joinpath(path.parts[-1])
else:
return Path('.').joinpath(*path.parts[-2:])
def process(input: str):
with open(input, 'rb') as fp:
mapping = {}
msg = BytesParser(policy=default).parse(fp)
# 1st walk for making mapping
for part in msg.walk():
if not re.match(r'text', part.get_content_type()):
url = part.get("Content-Location")
loc = urlparse(url).path if url else None
path = make_path(loc)
mapping[url] = str(path)
# 2nd walk for actual output
for part in msg.walk():
loc = part.get("Content-Location")
loc = urlparse(loc).path if loc else None
path = make_path(loc)
print(part.get_content_type(), path)
if path:
if re.match(r'text', part.get_content_type()):
part.set_charset('cp932')
content = part.get_content()
if not path.parent.exists():
path.parent.mkdir()
with open(path, 'wb') as out:
if type(content) == bytes:
out.write(part.get_content())
else:
parser = URLConverter(mapping)
parser.feed(part.get_content())
out.write(parser.get_output().encode('utf-8'))
if __name__ == "__main__":
process(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment