Skip to content

Instantly share code, notes, and snippets.

@brendano
Created January 2, 2011 22:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brendano/762900 to your computer and use it in GitHub Desktop.
Save brendano/762900 to your computer and use it in GitHub Desktop.
Publish Zotero papers as HTML and symlinks
#!/usr/bin/env python
# From your Zotero database and file storage,
# creates a simple HTML table, and directory full of symlinks,
# for quick-and-dirty web or Dropbox viewing.
# Installation: place in your Zotero folder
# e.g. ~/Documents/zotero/
# And run it
# e.g. python ~/Documents/zotero/make_views.py
# Two outputs:
# (1) flat_symlinks/ -- a directory full of symlinks to all PDFs, with nice
# filenames. It's formatted so that alphabetic order shows newest-first. If
# you're running your zotero/ directory inside Dropbox, this directory is easy
# to navigate from the iOS Dropbox app.
# (2) view.html -- an HTML table version.
# By Brendan O'Connor, Jan 2011, https://gist.github.com/762900
import os,sys,re,glob
import sqlalchemy
import sqlite3
here = os.path.dirname( os.path.abspath(__file__))
os.chdir(here)
os.system("mkdir -p flat_symlinks")
### Data
os.system("cp zotero.sqlite /tmp/zotero.sqlite")
conn= sqlalchemy.create_engine('sqlite:////tmp/zotero.sqlite',module=sqlite3.dbapi2).connect()
# import pysqlite2
# conn= sqlalchemy.create_engine('sqlite:///zotero.sqlite').connect()
HTML = open(os.path.join(here, 'view.html'), 'w')
sql = """
select items.itemID, items.dateAdded, group_concat(creatorData.lastName, ', ') as authors,
items.key,
myIA.sourceItemID myIA_sourceItemID,
dateDV.value as publishedDate,
titleDV.value as title,
urlDV.value as url,
group_concat( itemAttachments.path || '|' || IAitem.key || '|' || IAitem.itemID, '***') as files,
'BLA' as bla
from items
left join
(select itemID,creatorID from itemCreators order by orderIndex) itemCreators2
on itemCreators2.itemID=items.itemID
left join creators on creators.creatorID=itemCreators2.creatorID
left join creatorData on creatorData.creatorDataID=creators.creatorDataID
left join itemData dateD on (dateD.itemID=items.itemID and dateD.fieldID=14)
left join itemDataValues dateDV on dateDV.valueID=dateD.valueID
left join itemData urlD on (urlD.itemID=items.itemID and urlD.fieldID=1)
left join itemDataValues urlDV on urlDV.valueID=urlD.valueID
left join itemData titleD on (titleD.itemID=items.itemID and titleD.fieldID=110)
left join itemDataValues titleDV on titleDV.valueID=titleD.valueID
left join itemAttachments on itemAttachments.sourceItemID=items.itemID
left join items IAitem on IAitem.itemID=itemAttachments.itemID
left join itemAttachments myIA on myIA.itemID=items.itemID
group by items.itemID
order by items.dateAdded desc
"""
## Data Display
# Absolute URL lets you use iOS Dropbox for view.html, then see files via Internet connection,
# or in general, can publish view.html anywhere you want.
# Relative URL is necessary for symlinks (need to patch up to support both)
#STORAGE_URL_BASE = "http://dl.dropbox.com/u/1354835/zotero/"
STORAGE_URL_BASE = "./"
def bare_display_url(url):
url = url.replace("http://","").replace("https://","")
while url.endswith("/"): url = url[:-1]
if len(url) > 50:
url = url[:45] + " ..."
if url.startswith("storage/"): url = STORAGE_URL_BASE + url
return url
def unicodify(s, encoding='utf8', *args):
if isinstance(s,unicode): return s
if isinstance(s,str): return s.decode(encoding, *args)
return unicode(s)
def truncate(s):
s = (u"" if s is None else s)
# should do something smart with not truncating on word boundaries
if len(s)>200: s = s[:200] + "..."
return s
HTML.write( """<style>
td { vertical-align: top; border: 1px solid gray; padding:1px }
table { border-collapse: collapse; }
.dateAdded { font-size: 80% }
</style>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
""")
rows = list(conn.execute(sql))
itemIDs = set(int(r['itemID']) for r in rows)
accountedForIAitemIDs = set()
for r in rows:
files = (r['files'] or "").split('***')
# print files
files = sorted([re.sub("^storage:","",x) for x in files if x.startswith('storage:')])
files = [f.split('|') for f in files]
myIAitemIDs = set(int(IAitemID) for (file,key,IAitemID) in files)
accountedForIAitemIDs |= myIAitemIDs
# print accountedForIAitemIDs
print>>sys.stderr, "%d rows" % len(rows)
print>>sys.stderr, "Newest:", rows[0]
HTML.write( "<table>")
for i,r in enumerate(rows):
if r['itemID'] in accountedForIAitemIDs:
continue
cells = []
cells.append(('dateAdded', r['dateAdded'].split()[0]))
for k in "itemID myIA_sourceItemID authors".split():
cells.append((k, truncate(unicodify(r[k]))))
x = (r['publishedDate'] or "").split("-")
date = x[0] if x else ""
cells.append(('publishedDate', date))
if r['title'] and len(r['title'].split())==1 and len(r['title'])>60:
title = ""
else:
title = r['title']
cells.append(('title',title))
links = []
files = (r['files'] or "").split("***")
files = sorted([re.sub("^storage:","",x) for x in set(files) if x.startswith('storage:')])
for file in files:
file,key,IAitemID = file.split("|")
path = "storage/%s/%s" % (key, file)
#assert os.path.exists(path), path
path = STORAGE_URL_BASE + path
links.append((path, bare_display_url('storage: %s' % file)))
if r['url']:
links.append((r['url'], bare_display_url(r['url'])))
## File symlink output
local_links = [(link,name) for link,name in links if 'storage/' in link and link.endswith('.pdf')]
if local_links:
local_link,name = local_links[0]
cs = dict(cells)
#symlink_name = [u'%s:%s' % (k,v) for k,v in cs]
#symlink_name = [u'%s:%s' % (k,v) for k,v in cs]
fcells = ['%05d' % (10000 - int(cs['itemID'])), cs['authors'], cs['publishedDate'], cs['title']]
fcells = [v or '' for v in fcells]
fcells.append(os.path.basename(local_link))
symlink_name = u' | '.join(fcells)
if os.path.exists(local_link) and not os.path.exists(symlink_name):
rel_local_link = os.path.join('..', local_link)
#print "***", rel_local_link, symlink_name
os.chdir('flat_symlinks')
try:
os.symlink(rel_local_link, symlink_name)
except OSError,e:
pass
os.chdir('..')
## HTML output
link_html = " ".join(['<a href="%s">%s</a>' % link_pair for link_pair in links])
# if link_html: print link_html
cells.append(('links',link_html))
print>>HTML,"<tr>"
for name,value in cells:
print>>HTML, (u"<td class='%s'>%s" % (name,value or "")).encode('utf-8')
print>>HTML, "</table>"
HTML.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment