xiaoganghan/mynote.xml

## mynote.xml
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export2.dtd">
<en-export export-date="20120727T073610Z" application="Evernote" version="Evernote Mac 3.0.5 (209942)">
<note><title>Vim Tips</title><content><![CDATA[<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
<en-note style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;">
yank for copy, delete for cut, put for parse
<div><br/></div>
<div>Move in context, not position</div>
<div>/ search forward</div>
<div>? search backward</div>
<div>n repeat last search</div>
<div>N repeat last search but in the opposite direction</div>
<div>tx move to 'x'</div>
<div>fx find 'x'</div>
</en-note>
]]></content><created>20101229T161500Z</created><updated>20101231T161039Z</updated><note-attributes/></note>
</en-export>

## parse_evernote.py
from lxml import etree
from StringIO import StringIO
#http://www.hanxiaogang.com/writing/parsing-evernote-export-file-enex-using-python/
p = etree.XMLParser(remove_blank_text=True, resolve_entities=False)
def parseNoteXML(xmlFile):
    context = etree.iterparse(xmlFile, encoding='utf-8', strip_cdata=False)
    note_dict = {}
    notes = []
    for ind, (action, elem) in enumerate(context):
        text = elem.text
        if elem.tag == 'content':
            text = []
            r = etree.parse(StringIO(elem.text.encode('utf-8')), p)
            for e in r.iter():
                try:
                    text.append(e.text)
                except:
                    print 'cannot print'
        note_dict[elem.tag] = text
        if elem.tag == "note":
            notes.append(note_dict)
            note_dict = {}
    return notes

if __name__ == '__main__':
    notes = parseNoteXML('mynote.enex')

## result
[{'content': ['\nyank for copy, delete for cut, put for parse\n',
              None,
              None,
              'Move in context, not position',
              '/ search forward',
              '? search backward',
              'n repeat last search',
              'N repeat last search but in the opposite direction',
              "tx move to 'x'",
              "fx find 'x'"],
  'created': '20101229T161500Z',
  'note': None,
  'note-attributes': None,
  'title': 'Vim Tips',
  'updated': '20101231T161039Z'}]
	<?xml version="1.0" encoding="UTF-8"?>
	<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export2.dtd">
	<en-export export-date="20120727T073610Z" application="Evernote" version="Evernote Mac 3.0.5 (209942)">
	<note><title>Vim Tips</title><content><![CDATA[<?xml version="1.0" encoding="UTF-8" standalone="no"?>
	<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
	<en-note style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;">
	yank for copy, delete for cut, put for parse
	<div><br/></div>
	<div>Move in context, not position</div>
	<div>/ search forward</div>
	<div>? search backward</div>
	<div>n repeat last search</div>
	<div>N repeat last search but in the opposite direction</div>
	<div>tx move to 'x'</div>
	<div>fx find 'x'</div>
	</en-note>
	]]></content><created>20101229T161500Z</created><updated>20101231T161039Z</updated><note-attributes/></note>
	</en-export>
	from lxml import etree
	from StringIO import StringIO
	#http://www.hanxiaogang.com/writing/parsing-evernote-export-file-enex-using-python/
	p = etree.XMLParser(remove_blank_text=True, resolve_entities=False)
	def parseNoteXML(xmlFile):
	context = etree.iterparse(xmlFile, encoding='utf-8', strip_cdata=False)
	note_dict = {}
	notes = []
	for ind, (action, elem) in enumerate(context):
	text = elem.text
	if elem.tag == 'content':
	text = []
	r = etree.parse(StringIO(elem.text.encode('utf-8')), p)
	for e in r.iter():
	try:
	text.append(e.text)
	except:
	print 'cannot print'
	note_dict[elem.tag] = text
	if elem.tag == "note":
	notes.append(note_dict)
	note_dict = {}
	return notes

	if __name__ == '__main__':
	notes = parseNoteXML('mynote.enex')
	[{'content': ['\nyank for copy, delete for cut, put for parse\n',
	None,
	None,
	'Move in context, not position',
	'/ search forward',
	'? search backward',
	'n repeat last search',
	'N repeat last search but in the opposite direction',
	"tx move to 'x'",
	"fx find 'x'"],
	'created': '20101229T161500Z',
	'note': None,
	'note-attributes': None,
	'title': 'Vim Tips',
	'updated': '20101231T161039Z'}]