Skip to content

Instantly share code, notes, and snippets.

@retorquere
Created January 22, 2019 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save retorquere/3c3ccc3857dd0ad68546cf7d2256e383 to your computer and use it in GitHub Desktop.
Save retorquere/3c3ccc3857dd0ad68546cf7d2256e383 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from zipfile import ZipFile
import os
import csv
import sys
import re
import json
from pprint import pprint
import xml.etree.ElementTree as ET
library_path = [f for f in sys.argv[1:] if f.lower().endswith('.csv')][0]
header = None
library = { }
with open(library_path, encoding='utf-8-sig') as csvfile:
for row in csv.reader(csvfile):
if header is None:
header = row
else:
row = dict(zip(header, row))
title = row['Title']
author = row['Author'].split(';')[0].strip() if row['Author'] else ''
year = row['Publication Year'] if row['Publication Year'] else ''
author_year = ', '.join([ay for ay in [ author, year ] if ay != ''])
if author_year != '': title += f' ({author_year})'
library[row['Key']] = title
documents = [f for f in sys.argv[1:] if f.lower().endswith('.docx')]
namespaces = { 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
cited = {}
for docx in documents:
with ZipFile(docx) as f:
root = ET.parse(f.open('word/document.xml'))
for elem in root.findall('.//w:instrText', namespaces):
match = re.search(r'ADDIN ZOTERO_ITEM CSL_CITATION (\{.*\})$', elem.text.strip())
if not match: continue
citation = json.loads(match.group(1))
for item in citation['citationItems']:
for uri in item['uri']:
key = uri.split('/')[-1]
if not key in library: continue
if not key in cited: cited[key] = { 'title': library[key], 'documents': [] }
cited[key]['documents'].append(docx)
writer = csv.writer(open('cited.csv', 'w'), quoting=csv.QUOTE_MINIMAL)
for item in cited.values():
writer.writerow([item['title']] + item['documents'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment