Created
May 30, 2017 19:10
-
-
Save msbentley/21b83db14d539b8570ecc397f58c0fa7 to your computer and use it in GitHub Desktop.
Example ENEX file, checking MD5 hashes in resources (attachments) and note body
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"code_folding": [ | |
0 | |
], | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Imports\n", | |
"from lxml import etree\n", | |
"from StringIO import StringIO\n", | |
"import hashlib\n", | |
"import binascii" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"enexfile = 'attachment_test.enex'\n", | |
"root = etree.parse(enexfile).getroot()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1" | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"notes = root.xpath('//note')\n", | |
"len(notes)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'QOwnNote attachment test'" | |
] | |
}, | |
"execution_count": 40, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"note = root[0]\n", | |
"note.find('title').text" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"5" | |
] | |
}, | |
"execution_count": 41, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"resources = note.findall('resource')\n", | |
"len(resources)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 42, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"pdf-sample.pdf fa7d7e650b2cec68f302b31ba28235d8\n", | |
"docx_test.docx c27ff5a3df11bfde45c80b7e50b45aad\n", | |
"odt_test.odt f4c558bdb0faaf7f968ae8443a77c163\n", | |
"test.txt d03fd97600532ef84ddc1e578ea843e9\n", | |
"IMG_2034.JPG f01dd2039f6fc6df94053ea915871a76\n" | |
] | |
} | |
], | |
"source": [ | |
"for resource in resources:\n", | |
" data = resource.find('data').text.decode('base64')\n", | |
" md5 = hashlib.md5()\n", | |
" md5.update(data)\n", | |
" hash_hex = binascii.hexlify(md5.digest())\n", | |
" filename = resource.find('resource-attributes/file-name').text\n", | |
" mime = resource.find('mime').text\n", | |
" print filename, hash_hex" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Check the `en-media` hashes in the note content (parse the ENML)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"enml = etree.parse(StringIO(note.find('content').text))\n", | |
"enml_root = enml.getroot()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 44, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"media = enml_root.xpath('//en-media')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['fa7d7e650b2cec68f302b31ba28235d8',\n", | |
" 'c27ff5a3df11bfde45c80b7e50b45aad',\n", | |
" 'f4c558bdb0faaf7f968ae8443a77c163',\n", | |
" 'd03fd97600532ef84ddc1e578ea843e9',\n", | |
" 'f01dd2039f6fc6df94053ea915871a76']" | |
] | |
}, | |
"execution_count": 45, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"[m.attrib['hash'] for m in media]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"hide_input": false, | |
"kernelspec": { | |
"display_name": "Python [default]", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.13" | |
}, | |
"toc": { | |
"colors": { | |
"hover_highlight": "#DAA520", | |
"navigate_num": "#000000", | |
"navigate_text": "#333333", | |
"running_highlight": "#FF0000", | |
"selected_highlight": "#FFD700", | |
"sidebar_border": "#EEEEEE", | |
"wrapper_background": "#FFFFFF" | |
}, | |
"moveMenuLeft": true, | |
"nav_menu": { | |
"height": "66px", | |
"width": "252px" | |
}, | |
"navigate_menu": true, | |
"number_sections": true, | |
"sideBar": true, | |
"threshold": 4, | |
"toc_cell": false, | |
"toc_position": { | |
"height": "931px", | |
"left": "0px", | |
"right": "1776px", | |
"top": "106px", | |
"width": "189px" | |
}, | |
"toc_section_display": "block", | |
"toc_window_display": true, | |
"widenNotebook": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment