brendano/make_views.py

## make_views.py
#!/usr/bin/env python

# From your Zotero database and file storage,
# creates a simple HTML table, and directory full of symlinks,
# for quick-and-dirty web or Dropbox viewing.

# Installation: place in your Zotero folder
#   e.g. ~/Documents/zotero/
# And run it
#   e.g. python ~/Documents/zotero/make_views.py

# Two outputs:

# (1) flat_symlinks/ -- a directory full of symlinks to all PDFs, with nice
# filenames.  It's formatted so that alphabetic order shows newest-first.  If
# you're running your zotero/ directory inside Dropbox, this directory is easy
# to navigate from the iOS Dropbox app.

# (2) view.html -- an HTML table version.

# By Brendan O'Connor, Jan 2011, https://gist.github.com/762900

import os,sys,re,glob
import sqlalchemy
import sqlite3

here = os.path.dirname( os.path.abspath(__file__))
os.chdir(here)

os.system("mkdir -p flat_symlinks")

### Data

os.system("cp zotero.sqlite /tmp/zotero.sqlite")
conn= sqlalchemy.create_engine('sqlite:////tmp/zotero.sqlite',module=sqlite3.dbapi2).connect()
# import pysqlite2
# conn= sqlalchemy.create_engine('sqlite:///zotero.sqlite').connect()

HTML = open(os.path.join(here, 'view.html'), 'w')

sql = """
select items.itemID, items.dateAdded, group_concat(creatorData.lastName, ', ') as authors,
  items.key,
  myIA.sourceItemID myIA_sourceItemID,

  dateDV.value as publishedDate,
  titleDV.value as title,
  urlDV.value as url,

  group_concat( itemAttachments.path || '|' || IAitem.key || '|' || IAitem.itemID, '***') as files,

  'BLA' as bla
from items
  left join
    (select itemID,creatorID from itemCreators order by orderIndex) itemCreators2
    on itemCreators2.itemID=items.itemID
  left join creators     on creators.creatorID=itemCreators2.creatorID
  left join creatorData  on creatorData.creatorDataID=creators.creatorDataID

  left join itemData dateD         on (dateD.itemID=items.itemID and dateD.fieldID=14)
  left join itemDataValues dateDV  on dateDV.valueID=dateD.valueID

  left join itemData urlD         on (urlD.itemID=items.itemID and urlD.fieldID=1)
  left join itemDataValues urlDV  on urlDV.valueID=urlD.valueID

  left join itemData titleD         on (titleD.itemID=items.itemID and titleD.fieldID=110)
  left join itemDataValues titleDV  on titleDV.valueID=titleD.valueID

  left join itemAttachments on itemAttachments.sourceItemID=items.itemID
  left join items IAitem    on IAitem.itemID=itemAttachments.itemID

  left join itemAttachments myIA on myIA.itemID=items.itemID

group by items.itemID
order by items.dateAdded desc
"""


## Data Display

# Absolute URL lets you use iOS Dropbox for view.html, then see files via Internet connection,
#  or in general, can publish view.html anywhere you want.
# Relative URL is necessary for symlinks (need to patch up to support both)

#STORAGE_URL_BASE = "http://dl.dropbox.com/u/1354835/zotero/"
STORAGE_URL_BASE = "./"

def bare_display_url(url):
  url = url.replace("http://","").replace("https://","")
  while url.endswith("/"): url = url[:-1]
  if len(url) > 50:
    url = url[:45] + " ..."
  if url.startswith("storage/"): url = STORAGE_URL_BASE + url
  return url

def unicodify(s, encoding='utf8', *args):
  if isinstance(s,unicode): return s
  if isinstance(s,str): return s.decode(encoding, *args)
  return unicode(s)

def truncate(s):
  s = (u"" if s is None else s)
  # should do something smart with not truncating on word boundaries
  if len(s)>200: s = s[:200] + "..."
  return s

HTML.write( """<style>
td { vertical-align: top; border: 1px solid gray; padding:1px }
table { border-collapse: collapse; }
.dateAdded { font-size: 80% }
</style>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
""")

rows = list(conn.execute(sql))
itemIDs = set(int(r['itemID']) for r in rows)
accountedForIAitemIDs = set()
for r in rows:
  files = (r['files'] or "").split('***')
  # print files
  files = sorted([re.sub("^storage:","",x) for x in files if x.startswith('storage:')])
  files = [f.split('|') for f in files]
  myIAitemIDs = set(int(IAitemID) for (file,key,IAitemID) in files)
  accountedForIAitemIDs |= myIAitemIDs
# print accountedForIAitemIDs

print>>sys.stderr, "%d rows" % len(rows)
print>>sys.stderr, "Newest:", rows[0]

HTML.write( "<table>")


for i,r in enumerate(rows):
  if r['itemID'] in accountedForIAitemIDs:
    continue
  cells = []
  cells.append(('dateAdded', r['dateAdded'].split()[0]))

  for k in "itemID myIA_sourceItemID authors".split():
    cells.append((k, truncate(unicodify(r[k]))))
  x = (r['publishedDate'] or "").split("-")
  date = x[0] if x else ""
  cells.append(('publishedDate', date))
  if r['title'] and len(r['title'].split())==1 and len(r['title'])>60:
    title = ""
  else:
    title = r['title']
  cells.append(('title',title))

  links = []
  files = (r['files'] or "").split("***")
  files = sorted([re.sub("^storage:","",x) for x in set(files) if x.startswith('storage:')])
  for file in files:
    file,key,IAitemID = file.split("|")
    path = "storage/%s/%s" % (key, file)
    #assert os.path.exists(path), path
    path = STORAGE_URL_BASE + path
    links.append((path, bare_display_url('storage: %s' % file)))
  if r['url']:
    links.append((r['url'], bare_display_url(r['url'])))


  ## File symlink output
  local_links = [(link,name) for link,name in links if 'storage/' in link and link.endswith('.pdf')]
  if local_links:
    local_link,name = local_links[0]
    cs = dict(cells)
    #symlink_name = [u'%s:%s' % (k,v) for k,v in cs]
    #symlink_name = [u'%s:%s' % (k,v) for k,v in cs]
    fcells = ['%05d' % (10000 - int(cs['itemID'])), cs['authors'], cs['publishedDate'], cs['title']]
    fcells = [v or '' for v in fcells]
    fcells.append(os.path.basename(local_link))
    symlink_name = u' | '.join(fcells)
    if os.path.exists(local_link) and not os.path.exists(symlink_name):
      rel_local_link = os.path.join('..', local_link)
      #print "***", rel_local_link, symlink_name
      os.chdir('flat_symlinks')
      try:
        os.symlink(rel_local_link, symlink_name)
      except OSError,e:
        pass
      os.chdir('..')

  ## HTML output

  link_html = " ".join(['<a href="%s">%s</a>' % link_pair for link_pair in links])
  # if link_html: print link_html

  cells.append(('links',link_html))

  print>>HTML,"<tr>"
  for name,value in cells:
    print>>HTML, (u"<td class='%s'>%s" % (name,value or "")).encode('utf-8')

print>>HTML, "</table>"


HTML.close()
	#!/usr/bin/env python

	# From your Zotero database and file storage,
	# creates a simple HTML table, and directory full of symlinks,
	# for quick-and-dirty web or Dropbox viewing.

	# Installation: place in your Zotero folder
	# e.g. ~/Documents/zotero/
	# And run it
	# e.g. python ~/Documents/zotero/make_views.py

	# Two outputs:

	# (1) flat_symlinks/ -- a directory full of symlinks to all PDFs, with nice
	# filenames. It's formatted so that alphabetic order shows newest-first. If
	# you're running your zotero/ directory inside Dropbox, this directory is easy
	# to navigate from the iOS Dropbox app.

	# (2) view.html -- an HTML table version.

	# By Brendan O'Connor, Jan 2011, https://gist.github.com/762900

	import os,sys,re,glob
	import sqlalchemy
	import sqlite3

	here = os.path.dirname( os.path.abspath(__file__))
	os.chdir(here)

	os.system("mkdir -p flat_symlinks")

	### Data

	os.system("cp zotero.sqlite /tmp/zotero.sqlite")
	conn= sqlalchemy.create_engine('sqlite:////tmp/zotero.sqlite',module=sqlite3.dbapi2).connect()
	# import pysqlite2
	# conn= sqlalchemy.create_engine('sqlite:///zotero.sqlite').connect()

	HTML = open(os.path.join(here, 'view.html'), 'w')

	sql = """
	select items.itemID, items.dateAdded, group_concat(creatorData.lastName, ', ') as authors,
	items.key,
	myIA.sourceItemID myIA_sourceItemID,

	dateDV.value as publishedDate,
	titleDV.value as title,
	urlDV.value as url,

	group_concat( itemAttachments.path \|\| '\|' \|\| IAitem.key \|\| '\|' \|\| IAitem.itemID, '***') as files,

	'BLA' as bla
	from items
	left join
	(select itemID,creatorID from itemCreators order by orderIndex) itemCreators2
	on itemCreators2.itemID=items.itemID
	left join creators on creators.creatorID=itemCreators2.creatorID
	left join creatorData on creatorData.creatorDataID=creators.creatorDataID

	left join itemData dateD on (dateD.itemID=items.itemID and dateD.fieldID=14)
	left join itemDataValues dateDV on dateDV.valueID=dateD.valueID

	left join itemData urlD on (urlD.itemID=items.itemID and urlD.fieldID=1)
	left join itemDataValues urlDV on urlDV.valueID=urlD.valueID

	left join itemData titleD on (titleD.itemID=items.itemID and titleD.fieldID=110)
	left join itemDataValues titleDV on titleDV.valueID=titleD.valueID

	left join itemAttachments on itemAttachments.sourceItemID=items.itemID
	left join items IAitem on IAitem.itemID=itemAttachments.itemID

	left join itemAttachments myIA on myIA.itemID=items.itemID

	group by items.itemID
	order by items.dateAdded desc
	"""


	## Data Display

	# Absolute URL lets you use iOS Dropbox for view.html, then see files via Internet connection,
	# or in general, can publish view.html anywhere you want.
	# Relative URL is necessary for symlinks (need to patch up to support both)

	#STORAGE_URL_BASE = "http://dl.dropbox.com/u/1354835/zotero/"
	STORAGE_URL_BASE = "./"

	def bare_display_url(url):
	url = url.replace("http://","").replace("https://","")
	while url.endswith("/"): url = url[:-1]
	if len(url) > 50:
	url = url[:45] + " ..."
	if url.startswith("storage/"): url = STORAGE_URL_BASE + url
	return url

	def unicodify(s, encoding='utf8', *args):
	if isinstance(s,unicode): return s
	if isinstance(s,str): return s.decode(encoding, *args)
	return unicode(s)

	def truncate(s):
	s = (u"" if s is None else s)
	# should do something smart with not truncating on word boundaries
	if len(s)>200: s = s[:200] + "..."
	return s

	HTML.write( """<style>
	td { vertical-align: top; border: 1px solid gray; padding:1px }
	table { border-collapse: collapse; }
	.dateAdded { font-size: 80% }
	</style>
	<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
	""")

	rows = list(conn.execute(sql))
	itemIDs = set(int(r['itemID']) for r in rows)
	accountedForIAitemIDs = set()
	for r in rows:
	files = (r['files'] or "").split('***')
	# print files
	files = sorted([re.sub("^storage:","",x) for x in files if x.startswith('storage:')])
	files = [f.split('\|') for f in files]
	myIAitemIDs = set(int(IAitemID) for (file,key,IAitemID) in files)
	accountedForIAitemIDs \|= myIAitemIDs
	# print accountedForIAitemIDs

	print>>sys.stderr, "%d rows" % len(rows)
	print>>sys.stderr, "Newest:", rows[0]

	HTML.write( "<table>")


	for i,r in enumerate(rows):
	if r['itemID'] in accountedForIAitemIDs:
	continue
	cells = []
	cells.append(('dateAdded', r['dateAdded'].split()[0]))

	for k in "itemID myIA_sourceItemID authors".split():
	cells.append((k, truncate(unicodify(r[k]))))
	x = (r['publishedDate'] or "").split("-")
	date = x[0] if x else ""
	cells.append(('publishedDate', date))
	if r['title'] and len(r['title'].split())==1 and len(r['title'])>60:
	title = ""
	else:
	title = r['title']
	cells.append(('title',title))

	links = []
	files = (r['files'] or "").split("***")
	files = sorted([re.sub("^storage:","",x) for x in set(files) if x.startswith('storage:')])
	for file in files:
	file,key,IAitemID = file.split("\|")
	path = "storage/%s/%s" % (key, file)
	#assert os.path.exists(path), path
	path = STORAGE_URL_BASE + path
	links.append((path, bare_display_url('storage: %s' % file)))
	if r['url']:
	links.append((r['url'], bare_display_url(r['url'])))


	## File symlink output
	local_links = [(link,name) for link,name in links if 'storage/' in link and link.endswith('.pdf')]
	if local_links:
	local_link,name = local_links[0]
	cs = dict(cells)
	#symlink_name = [u'%s:%s' % (k,v) for k,v in cs]
	#symlink_name = [u'%s:%s' % (k,v) for k,v in cs]
	fcells = ['%05d' % (10000 - int(cs['itemID'])), cs['authors'], cs['publishedDate'], cs['title']]
	fcells = [v or '' for v in fcells]
	fcells.append(os.path.basename(local_link))
	symlink_name = u' \| '.join(fcells)
	if os.path.exists(local_link) and not os.path.exists(symlink_name):
	rel_local_link = os.path.join('..', local_link)
	#print "***", rel_local_link, symlink_name
	os.chdir('flat_symlinks')
	try:
	os.symlink(rel_local_link, symlink_name)
	except OSError,e:
	pass
	os.chdir('..')

	## HTML output

	link_html = " ".join(['<a href="%s">%s</a>' % link_pair for link_pair in links])
	# if link_html: print link_html

	cells.append(('links',link_html))

	print>>HTML,"<tr>"
	for name,value in cells:
	print>>HTML, (u"<td class='%s'>%s" % (name,value or "")).encode('utf-8')

	print>>HTML, "</table>"


	HTML.close()