Created
January 2, 2011 22:53
-
-
Save brendano/762900 to your computer and use it in GitHub Desktop.
Publish Zotero papers as HTML and symlinks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# From your Zotero database and file storage, | |
# creates a simple HTML table, and directory full of symlinks, | |
# for quick-and-dirty web or Dropbox viewing. | |
# Installation: place in your Zotero folder | |
# e.g. ~/Documents/zotero/ | |
# And run it | |
# e.g. python ~/Documents/zotero/make_views.py | |
# Two outputs: | |
# (1) flat_symlinks/ -- a directory full of symlinks to all PDFs, with nice | |
# filenames. It's formatted so that alphabetic order shows newest-first. If | |
# you're running your zotero/ directory inside Dropbox, this directory is easy | |
# to navigate from the iOS Dropbox app. | |
# (2) view.html -- an HTML table version. | |
# By Brendan O'Connor, Jan 2011, https://gist.github.com/762900 | |
import os,sys,re,glob | |
import sqlalchemy | |
import sqlite3 | |
here = os.path.dirname( os.path.abspath(__file__)) | |
os.chdir(here) | |
os.system("mkdir -p flat_symlinks") | |
### Data | |
os.system("cp zotero.sqlite /tmp/zotero.sqlite") | |
conn= sqlalchemy.create_engine('sqlite:////tmp/zotero.sqlite',module=sqlite3.dbapi2).connect() | |
# import pysqlite2 | |
# conn= sqlalchemy.create_engine('sqlite:///zotero.sqlite').connect() | |
HTML = open(os.path.join(here, 'view.html'), 'w') | |
sql = """ | |
select items.itemID, items.dateAdded, group_concat(creatorData.lastName, ', ') as authors, | |
items.key, | |
myIA.sourceItemID myIA_sourceItemID, | |
dateDV.value as publishedDate, | |
titleDV.value as title, | |
urlDV.value as url, | |
group_concat( itemAttachments.path || '|' || IAitem.key || '|' || IAitem.itemID, '***') as files, | |
'BLA' as bla | |
from items | |
left join | |
(select itemID,creatorID from itemCreators order by orderIndex) itemCreators2 | |
on itemCreators2.itemID=items.itemID | |
left join creators on creators.creatorID=itemCreators2.creatorID | |
left join creatorData on creatorData.creatorDataID=creators.creatorDataID | |
left join itemData dateD on (dateD.itemID=items.itemID and dateD.fieldID=14) | |
left join itemDataValues dateDV on dateDV.valueID=dateD.valueID | |
left join itemData urlD on (urlD.itemID=items.itemID and urlD.fieldID=1) | |
left join itemDataValues urlDV on urlDV.valueID=urlD.valueID | |
left join itemData titleD on (titleD.itemID=items.itemID and titleD.fieldID=110) | |
left join itemDataValues titleDV on titleDV.valueID=titleD.valueID | |
left join itemAttachments on itemAttachments.sourceItemID=items.itemID | |
left join items IAitem on IAitem.itemID=itemAttachments.itemID | |
left join itemAttachments myIA on myIA.itemID=items.itemID | |
group by items.itemID | |
order by items.dateAdded desc | |
""" | |
## Data Display | |
# Absolute URL lets you use iOS Dropbox for view.html, then see files via Internet connection, | |
# or in general, can publish view.html anywhere you want. | |
# Relative URL is necessary for symlinks (need to patch up to support both) | |
#STORAGE_URL_BASE = "http://dl.dropbox.com/u/1354835/zotero/" | |
STORAGE_URL_BASE = "./" | |
def bare_display_url(url): | |
url = url.replace("http://","").replace("https://","") | |
while url.endswith("/"): url = url[:-1] | |
if len(url) > 50: | |
url = url[:45] + " ..." | |
if url.startswith("storage/"): url = STORAGE_URL_BASE + url | |
return url | |
def unicodify(s, encoding='utf8', *args): | |
if isinstance(s,unicode): return s | |
if isinstance(s,str): return s.decode(encoding, *args) | |
return unicode(s) | |
def truncate(s): | |
s = (u"" if s is None else s) | |
# should do something smart with not truncating on word boundaries | |
if len(s)>200: s = s[:200] + "..." | |
return s | |
HTML.write( """<style> | |
td { vertical-align: top; border: 1px solid gray; padding:1px } | |
table { border-collapse: collapse; } | |
.dateAdded { font-size: 80% } | |
</style> | |
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | |
""") | |
rows = list(conn.execute(sql)) | |
itemIDs = set(int(r['itemID']) for r in rows) | |
accountedForIAitemIDs = set() | |
for r in rows: | |
files = (r['files'] or "").split('***') | |
# print files | |
files = sorted([re.sub("^storage:","",x) for x in files if x.startswith('storage:')]) | |
files = [f.split('|') for f in files] | |
myIAitemIDs = set(int(IAitemID) for (file,key,IAitemID) in files) | |
accountedForIAitemIDs |= myIAitemIDs | |
# print accountedForIAitemIDs | |
print>>sys.stderr, "%d rows" % len(rows) | |
print>>sys.stderr, "Newest:", rows[0] | |
HTML.write( "<table>") | |
for i,r in enumerate(rows): | |
if r['itemID'] in accountedForIAitemIDs: | |
continue | |
cells = [] | |
cells.append(('dateAdded', r['dateAdded'].split()[0])) | |
for k in "itemID myIA_sourceItemID authors".split(): | |
cells.append((k, truncate(unicodify(r[k])))) | |
x = (r['publishedDate'] or "").split("-") | |
date = x[0] if x else "" | |
cells.append(('publishedDate', date)) | |
if r['title'] and len(r['title'].split())==1 and len(r['title'])>60: | |
title = "" | |
else: | |
title = r['title'] | |
cells.append(('title',title)) | |
links = [] | |
files = (r['files'] or "").split("***") | |
files = sorted([re.sub("^storage:","",x) for x in set(files) if x.startswith('storage:')]) | |
for file in files: | |
file,key,IAitemID = file.split("|") | |
path = "storage/%s/%s" % (key, file) | |
#assert os.path.exists(path), path | |
path = STORAGE_URL_BASE + path | |
links.append((path, bare_display_url('storage: %s' % file))) | |
if r['url']: | |
links.append((r['url'], bare_display_url(r['url']))) | |
## File symlink output | |
local_links = [(link,name) for link,name in links if 'storage/' in link and link.endswith('.pdf')] | |
if local_links: | |
local_link,name = local_links[0] | |
cs = dict(cells) | |
#symlink_name = [u'%s:%s' % (k,v) for k,v in cs] | |
#symlink_name = [u'%s:%s' % (k,v) for k,v in cs] | |
fcells = ['%05d' % (10000 - int(cs['itemID'])), cs['authors'], cs['publishedDate'], cs['title']] | |
fcells = [v or '' for v in fcells] | |
fcells.append(os.path.basename(local_link)) | |
symlink_name = u' | '.join(fcells) | |
if os.path.exists(local_link) and not os.path.exists(symlink_name): | |
rel_local_link = os.path.join('..', local_link) | |
#print "***", rel_local_link, symlink_name | |
os.chdir('flat_symlinks') | |
try: | |
os.symlink(rel_local_link, symlink_name) | |
except OSError,e: | |
pass | |
os.chdir('..') | |
## HTML output | |
link_html = " ".join(['<a href="%s">%s</a>' % link_pair for link_pair in links]) | |
# if link_html: print link_html | |
cells.append(('links',link_html)) | |
print>>HTML,"<tr>" | |
for name,value in cells: | |
print>>HTML, (u"<td class='%s'>%s" % (name,value or "")).encode('utf-8') | |
print>>HTML, "</table>" | |
HTML.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment