Skip to content

Instantly share code, notes, and snippets.

@alphapapa
Forked from wzhd/.gitignore
Created September 11, 2016 07:52
Show Gist options
  • Save alphapapa/516b77ab41cf948ac275c78dd211693f to your computer and use it in GitHub Desktop.
Save alphapapa/516b77ab41cf948ac275c78dd211693f to your computer and use it in GitHub Desktop.
Parsing Evernote export file (.enex) using Python
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export2.dtd">
<en-export export-date="20120727T073610Z" application="Evernote" version="Evernote Mac 3.0.5 (209942)">
<note>
<title>Vim Tips</title>
<content>
<![CDATA[
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
<en-note style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;">
yank for copy, delete for cut, put for parse
<div><br/></div>
<div>Move in context, not position</div>
<div>/ search forward</div>
<div>? search backward</div>
<div>n repeat last search</div>
<div>N repeat last search but in the opposite direction</div>
<div>tx move to 'x'</div>
<div>fx find 'x'</div>
</en-note>
]]>
</content>
<created>20101229T161500Z</created>
<updated>20101231T161039Z</updated>
<note-attributes/>
</note>
</en-export>
#!/usr/bin/env python3
from base64 import b64decode
import hashlib
from lxml import etree
from io import BytesIO
import os
from time import strptime
from pypandoc import convert_text
#http://www.hanxiaogang.com/writing/parsing-evernote-export-file-enex-using-python/
p = etree.XMLParser(remove_blank_text=True, resolve_entities=False)
def parse_content(content):
text = convert_text(content, 'org', format='html')
return text
def parse_resource(resource):
rsc_dict = {}
for elem in resource:
if elem.tag == 'data':
# Some times elem.text is None
rsc_dict[elem.tag] = b64decode(elem.text) if elem.text else b''
rsc_dict['hash'] = hashlib.md5(rsc_dict[elem.tag]).hexdigest()
else:
rsc_dict[elem.tag] = elem.text
return rsc_dict
def parse_note(note):
note_dict = {}
resources = []
for elem in note:
if elem.tag == 'content':
note_dict[elem.tag] = parse_content(elem.text)
# A copy of original content
note_dict['content-raw'] = elem.text
elif elem.tag == 'resource':
resources.append(parse_resource(elem))
elif elem.tag == 'created' or elem.tag == 'updated':
note_dict[elem.tag] = strptime(elem.text, '%Y%m%dT%H%M%SZ')
else:
note_dict[elem.tag] = elem.text
note_dict['resource'] = resources
return note_dict
def parseNoteXML(xmlFile):
# Without huge_tree set to True, parser may complain about huge text node
# Try to recover, because there may be "&nbsp;", which will cause
# "XMLSyntaxError: Entity 'nbsp' not defined"
context = etree.iterparse(xmlFile, encoding='utf-8', strip_cdata=False, huge_tree=True, recover=True)
for action, elem in context:
if elem.tag == "note":
yield parse_note(elem)
# Save notes and attachments
# in directories named according to date of creation
def export_note(note):
date = note['created']
year = str(date.tm_year)
mon = '%02d' % date.tm_mon
mday = '%02d' % date.tm_mday
note_dir = os.path.join('en-export', year, mon, mday)
os.makedirs(note_dir, exist_ok=True)
# Remove "/" from filenames
title = note['title'].replace('/', ' ')[:20]
text_file = os.path.join(note_dir, title +'.org')
with open(text_file, 'w') as fd:
# Write the original title
fd.write('#+TITLE: ' + note['title'] + '\n')
fd.write(note['content'])
bak_file = os.path.join(note_dir, title+'.bak')
with open(bak_file, 'w') as fd:
fd.write(note['content-raw'])
for resource in note['resource']:
rsc_file = os.path.join(note_dir, resource['hash']+'.data')
data = resource['data']
with open(rsc_file, 'wb') as fd:
fd.write(data)
if __name__ == '__main__':
notes = parseNoteXML('mynote.enex')
for note in notes:
export_note(note)
[{'content': ['\nyank for copy, delete for cut, put for parse\n',
None,
None,
'Move in context, not position',
'/ search forward',
'? search backward',
'n repeat last search',
'N repeat last search but in the opposite direction',
"tx move to 'x'",
"fx find 'x'"],
'created': '20101229T161500Z',
'note': None,
'note-attributes': None,
'title': 'Vim Tips',
'updated': '20101231T161039Z'}]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment