Created
January 5, 2019 14:18
-
-
Save christian-smith/aa0e8ddf7221ac7a88731247c87cc29a to your computer and use it in GitHub Desktop.
Convert Apple webloc files (such as DEVONthink bookmarks) to org-mode entries and preserve filetags and timestamps
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
webloc2orgmode.py | |
This script will take Apple webloc files (such as bookmarks made in DEVONthink) and export them as org-mode entries. | |
It will also save the timestamp and tag information for each webloc | |
Usage: | |
$ pip3 install lxml biplist xattr bs4 | |
$ touch ~/bookmarks.org | |
$ mkdir done | |
$ mkdir weblocs | |
(move all webloc files to the weblocs directory) | |
$ python3 webloc2orgmode.py | |
All parsed webloc's will have moved to the done/ directory | |
""" | |
import biplist | |
import glob | |
import lxml.html | |
import os | |
import requests | |
import shutil | |
import time | |
import xattr | |
from bs4 import BeautifulSoup | |
from lxml import etree | |
from struct import unpack | |
from time import sleep | |
for filename in glob.glob('bookmarks/*.webloc'): | |
tree = etree.parse(filename).getroot() | |
dict = tree.getchildren()[0] | |
url = dict.getchildren()[1].text | |
tags = [] | |
try: | |
tagsPlist = xattr.getxattr(filename, 'com.apple.metadata:_kMDItemUserTags') | |
tags = biplist.readPlistFromString(tagsPlist) | |
except: | |
print("tagerror") | |
formattedTags = [] | |
if len(tags) > 0: | |
for tag in tags: | |
formattedTags.append(tag.replace('\n0', '')) | |
stat = os.stat(filename) | |
date = time.strftime('[%Y-%m-%d %a %H:%M:%S %z]', time.localtime(stat.st_birthtime)) | |
title = 'Undefined' | |
try: | |
request = requests.get(url) | |
soup = BeautifulSoup(request.content, "lxml") | |
if soup.title is not None: | |
title = soup.title.string | |
except: | |
print("url error") | |
line1 = '' | |
if title and len(formattedTags) > 0: | |
line1 = "* [[%s][%s]] :%s:" % (url, title, ':'.join(formattedTags)) | |
else: | |
line1 = "* [[%s][%s]]" % (url, title) | |
properties = ":PROPERTIES:\n:CREATED: %s\n:END:" % date | |
homedir = os.path.expanduser('~') | |
print(line1, file=open(homedir + "/bookmarks.org", "a")) | |
print(properties, file=open(homedir + "/bookmarks.org", "a")) | |
shutil.move(filename, "done/") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment