from xml.etree import ElementTree as ET | |
from collections import OrderedDict | |
from appscript import * | |
import base64 | |
import sys | |
import shutil | |
import os | |
class EvernoteProcessor: | |
def __init__(self): | |
self.devon = app("DEVONthink 3") | |
def print_trends(self, notes): | |
count = {} | |
for title, tags in notes.items(): | |
for tag in tags: | |
if tag not in count: | |
count[tag] = 0 | |
count[tag] = count[tag] + 1 | |
sorterd_count = sorted(count.items(), key=lambda x: x[1], reverse=True) | |
print(sorterd_count) | |
def create_html_in_devon(self, journaldb, title, tags, html): | |
try: | |
self.devon.create_record_with({k.type:k.html, k.tags: tags, k.name: title, k.data: html},in_=journaldb) | |
except: | |
pass | |
def create_picture_in_devon(self, journaldb, title, tags, url): | |
try: | |
self.devon.create_record_with({k.type:k.picture, k.tags: tags, k.name: title, k.data: url},in_=journaldb) | |
except: | |
# Will probably say CommandError -50, Invalid argument data, but it works nonetheless. | |
pass | |
def save_evernote_attachment(self, attach, rootdir, title): | |
imgraw = base64.b64decode(attach.text) | |
filename = rootdir + '/' + title + '.jpg' | |
with open(filename, 'wb') as outfile: | |
outfile.write(imgraw) | |
return filename | |
def process_html_notes(self, filename): | |
print("Processing " + filename) | |
db = self.devon.databases[1].root | |
journaldb = self.devon.create_record_with({k.type:k.group,k.name: filename.replace(".enex", "")},in_=db) | |
xml = ET.parse(filename).getroot() | |
rootdir = filename.replace(" ", "_").replace(".enex", "") | |
try: | |
shutil.rmtree(rootdir) | |
except: | |
pass # whatever. | |
os.mkdir(rootdir) | |
for note in xml.findall('.//note'): | |
title = note.find("title").text | |
tags = [] | |
for tag in note.findall('.//tag'): | |
tags.append(tag.text) | |
content = note.find("content").text.replace("<![CDATA[", "").replace("]]>", "") | |
self.create_html_in_devon(journaldb, title, tags, content) | |
i = 0 | |
for attach in note.findall(".//data"): | |
i = i + 1 | |
url = self.save_evernote_attachment(attach, rootdir, title + '-' + str(i)) | |
self.create_picture_in_devon(journaldb, title + '-' + str(i), tags, os.getcwd() + '/' + url) | |
def process_scanned_notes(self, filename): | |
print("Processing " + filename) | |
db = self.devon.databases[1].root | |
journaldb = self.devon.create_record_with({k.type:k.group,k.name: filename.replace(".enex", "")},in_=db) | |
xml = ET.parse(filename).getroot() | |
rootdir = filename.replace(" ", "_").replace(".enex", "") | |
try: | |
shutil.rmtree(rootdir) | |
except: | |
pass # whatever. | |
os.mkdir(rootdir) | |
notes = {} | |
for note in xml.findall('.//note'): | |
title = note.find("title").text | |
tags = [] | |
for tag in note.findall('.//tag'): | |
tags.append(tag.text) | |
notes[title] = tags | |
# should be only one. just for good measure | |
for attach in note.findall(".//data"): | |
url = self.save_evernote_attachment(attach, rootdir, title) | |
self.create_picture_in_devon(journaldb, title, tags, os.getcwd() + '/' + url) | |
self.print_trends(notes) | |
processer = EvernoteProcessor() | |
# ### usage for SCAN-ONLY notes: | |
# processer.process_scanned_notes('Boekje 01 082010 - 092011.enex') | |
# ### usage for HTML with attachment notes: | |
# processer.process_html_notes('Flarden.enex') | |
print("Done!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment