Last active
May 5, 2021 15:59
-
-
Save thespacedoctor/42c514861a89a7fb6b0a2039fec6da71 to your computer and use it in GitHub Desktop.
[papers-anno2md] A Script for macOS Papers.app: Convert Papers PDF Annotations to Markdown Notes and Add to the General Notes Section for the Publication #papers #annotations #markdown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
# encoding: utf-8 | |
""" | |
*Convert Papers PDF Annotations to Markdown Notes and Add to the General Notes Section for the Publication* | |
:Author: | |
David Young | |
:Date Created: | |
August 30, 2017 | |
Usage: | |
papers-anno2md | |
Options: | |
-h, --help show this help message | |
""" | |
################# GLOBAL IMPORTS #################### | |
import sys | |
import os | |
import json | |
from os.path import expanduser | |
import sqlite3 | |
import psutil | |
import base64 | |
from polyglot.markdown import translate | |
from subprocess import Popen, PIPE, STDOUT | |
from operator import itemgetter | |
from fundamentals import tools | |
PAPERS_DB_PATH = "~/Library/Application Support/Papers3/Library_Data/C02N63X6G3QP__D741EA9B-BEF1-44A5-8087-4481F4EABAF3/Library.papers3/Database.papersdb" | |
def dict_factory(cursor, row): | |
d = {} | |
for idx, col in enumerate(cursor.description): | |
d[col[0]] = row[idx] | |
return d | |
def main(arguments=None): | |
""" | |
*The main function used when ``papers-cleanDB.py`` is run as a single script from the cl* | |
""" | |
# SETUP THE COMMAND-LINE UTIL SETTINGS | |
su = tools( | |
arguments=arguments, | |
docString=__doc__, | |
logLevel="WARNING", | |
options_first=False, | |
projectName=False | |
) | |
arguments, settings, log, dbConn = su.setup() | |
# UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES | |
# AUTOMATICALLY | |
for arg, val in arguments.iteritems(): | |
if arg[0] == "-": | |
varname = arg.replace("-", "") + "Flag" | |
else: | |
varname = arg.replace("<", "").replace(">", "") | |
if isinstance(val, str) or isinstance(val, unicode): | |
exec(varname + " = '%s'" % (val,)) | |
else: | |
exec(varname + " = %s" % (val,)) | |
if arg == "--dbConn": | |
dbConn = val | |
log.debug('%s = %s' % (varname, val,)) | |
home = expanduser("~") | |
papersDB = PAPERS_DB_PATH.replace("~/", home + "/") | |
# DON'T CONTINUE IF PAPERS APP IS NOT OPEN | |
openApp = False | |
for pid in psutil.pids(): | |
p = psutil.Process(pid) | |
if "papers" in p.name().lower(): | |
if "Papers.app" in p.exe(): | |
openApp = True | |
if not openApp: | |
log.warning( | |
"Papers.app is not open, open the app before running this script") | |
return | |
papersJson, paperUuids = grab_papers_annotations_with_applescript(log) | |
noteDict = compile_markdown_notes_from_annotations( | |
papersJson, paperUuids, log) | |
# CONNECT TO PAPERS DATABASE | |
conn = sqlite3.connect(papersDB) | |
conn.row_factory = dict_factory | |
cursor = conn.cursor() | |
add_notes_and_citations_to_papers(cursor, noteDict, log) | |
cursor.close() | |
return | |
def grab_papers_annotations_with_applescript( | |
log): | |
"""*grab all annotations from the papers database via applescript - there's no easy way to order the annotations correctly when taken directly from the database* | |
**Key Arguments:** | |
- ``log`` -- logger | |
**Return:** | |
- ``papersJson`` -- list of json properties for every annotation | |
- ``paperUuids`` -- list of uuids the pdf/publication annotation belong to (same length as ``papersJson``) | |
""" | |
log.info('starting the ``grab_papers_annotations_with_applescript`` function') | |
applescript = """ | |
tell application "Papers" | |
set thisList to {} | |
set sourceList to {} | |
set allAnnotations to every annotation item | |
repeat with i from 1 to count of allAnnotations | |
set end of thisList to json string of item i of allAnnotations | |
set end of sourceList to (id of publication item of (source file of item i of allAnnotations)) | |
end repeat | |
return thisList & "||||||" & sourceList | |
end tell | |
""" % locals() | |
cmd = "\n".join(["osascript << EOT", applescript, "EOT"]) | |
p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) | |
stdout, stderr = p.communicate() | |
# JSON PROPERTIES FOR EVERY ANNOTATION AND THE UUID OF THE PDF/PUBLICATION | |
# IT BELONGS TO | |
papersJson, paperUuids = stdout.split("||||||") | |
papersJson = papersJson.strip()[:-1] | |
papersJson = json.loads("[%(papersJson)s]" % locals()) | |
paperUuids = paperUuids.replace(" ", "").split(",")[1:] | |
log.info('completed the ``grab_papers_annotations_with_applescript`` function') | |
return papersJson, paperUuids | |
def compile_markdown_notes_from_annotations( | |
papersJson, | |
paperUuids, | |
log): | |
"""*iterate through all annotations and compile markdown style notes for each publication* | |
**Key Arguments:** | |
- ``papersJson`` -- list of json properties for every annotation | |
- ``paperUuids`` -- list of uuids the pdf/publication annotation belong to (same length as ``papersJson``) | |
- ``log`` -- logger | |
**Return:** | |
- ``noteDict`` -- a dictionary of markdown notes with publication uuids as keys | |
""" | |
log.info('starting the ``compile_markdown_notes_from_annotations`` function') | |
md = translate( | |
log=log, | |
settings=False | |
) | |
# ASSIGN CORRECT PDF UUID & CONVERT ANNOTATIONS TO MD | |
for u, j in zip(paperUuids, papersJson): | |
j["uuid"] = u | |
top = 1000000. | |
if "rectangles" in j: | |
for r in j["rectangles"]: | |
this = float(r.split("}, {")[0].split(", ")[1]) | |
if this < top: | |
top = this | |
elif "top" in j: | |
top = j["top"] | |
page_nr = j["page_nr"] | |
top = 100000000. - top | |
annIndex = "%(page_nr)s-%(top)s" % locals() | |
j["annIndex"] = annIndex | |
if j['type'] == 2: | |
if "text" not in j or len(j['text'].strip()) == 0: | |
j["md"] = "" | |
elif j['color'] == 1: | |
j["md"] = md.blockquote(j['text']) | |
elif j['color'] == 2: | |
j["md"] = md.codeblock(j['text'], "xxx") | |
elif j['color'] == 3: | |
j["md"] = j['text'].strip() | |
elif j['color'] == 4: | |
j["md"] = md.bold(j['text']) | |
elif j['color'] == 5: | |
j["md"] = md.header(j['text'], level=2) | |
elif j['color'] == 6: | |
j["md"] = md.em(j['text']) | |
elif j['type'] == 0: | |
if "contents" not in j: | |
j["md"] = "" | |
else: | |
j["md"] = j['contents'].strip() | |
elif j['type'] == 53: | |
j["md"] = "**!!!TAKE A SCREENGRAB OF ANNOTATED IMAGE(S) IN PDF, CONVERT TO MD LINK AND PASTE INTO NOTE AT IMAGE LOCATION!!!**" | |
papersJson = sorted(papersJson, key=itemgetter( | |
'annIndex'), reverse=False) | |
noteDict = {} | |
for j in papersJson: | |
if j["uuid"] not in noteDict: | |
noteDict[j["uuid"]] = [] | |
if "md" in j: | |
noteDict[j["uuid"]].append(j["md"]) | |
log.info('completed the ``compile_markdown_notes_from_annotations`` function') | |
return noteDict | |
def add_notes_and_citations_to_papers( | |
cursor, | |
noteDict, | |
log): | |
"""*add markdown notes and citations to the general notes section of papers (via sqlite database* | |
**Key Arguments:** | |
- ``cursor`` -- the database cursor | |
- ``noteDict`` -- a dictionary of markdown notes with publication uuids as keys | |
- ``log`` -- logger | |
""" | |
log.info('starting the ``add_notes_and_citations_to_papers`` function') | |
# GRAB ALL UUIDs OF ALL PAPERS | |
sqlQuery = """select uuid from Publication where subtitle is not null and subtitle != "MISSING" and author_year_string is not null;""" | |
cursor.execute(sqlQuery) | |
paperMeta = cursor.fetchall() | |
# FOR EACH PAPER, GRAB METADATA TO FORM CITATION AND | |
# ADD THE PRE-GENERATED NOTES TO THE METADATA DICTS | |
for p in paperMeta: | |
puid = p["uuid"] | |
sqlQuery = """select remote_id as url from (select * from SyncEvent where device_ID = "%(puid)s" and remote_id like "http%%" order by updated_at) group by device_ID;""" % locals() | |
cursor.execute(sqlQuery) | |
results = cursor.fetchall() | |
if len(results): | |
p["url"] = results[0]["url"] | |
else: | |
p["url"] = None | |
sqlQuery = """select title, author_string, author_year_string, type, subtype, publisher, label, rating, subtitle, summary, tag_string, kind from Publication where uuid = "%(puid)s";""" % locals( | |
) | |
cursor.execute(sqlQuery) | |
results = cursor.fetchall() | |
if len(results): | |
p.update(results[0]) | |
p["author_year_string"] = p["author_year_string"].replace( | |
"(", "").replace(")", "") | |
p["author_string"] = p["author_string"].title() | |
p["citeKey"] = "#%(title)s%(author_string)s" % p | |
p["citeKey"] = p["citeKey"].lower().replace(" ", "") | |
p["year"] = p["author_year_string"][-5:-1] | |
if p["rating"]: | |
stars = [u"★★★★★", u"★★★★☆", u"★★★☆☆", u"★★☆☆☆", u"★☆☆☆☆"] | |
numbers = [5, 4, 3, 2, 1] | |
for s, n in zip(stars, numbers): | |
if p["rating"] == n: | |
p["rating"] = s | |
else: | |
p["rating"] = "" | |
if p["url"]: | |
p["link"] = "*[%(title)s](%(url)s)*" % p | |
else: | |
p["link"] = "*%(title)s*" % p | |
if "Journal" in p["subtitle"] or p["subtitle"] in ["Draft Paper", "Book", "Book Chapter"]: | |
p["cite"] = """ | |
[Not Cited][%(citeKey)s] | |
[%(citeKey)s]: %(rating)s **%(subtitle)s**: %(author_year_string)s, %(link)s, %(publisher)s. | |
""" % p | |
elif p["subtitle"] in ["Podcast", "Video"]: | |
p["cite"] = u""" | |
[Not Cited][%(citeKey)s] | |
[%(citeKey)s]: %(rating)s **%(subtitle)s**: %(author_string)s, %(link)s, %(publisher)s. | |
""" % p | |
else: | |
p["cite"] = """ | |
[Not Cited][%(citeKey)s] | |
[%(citeKey)s]: %(rating)s **%(subtitle)s**: %(link)s, %(publisher)s. | |
""" % p | |
p["cite"] = p["cite"].replace(", None.", "").replace( | |
"..", ".").replace("Journal Abstract", "Journal Article").replace(" Et Al.", " et al.").replace(" , ", " ") | |
p["mdNote"] = "" | |
if p["uuid"] in noteDict: | |
p["mdNote"] = ("\n\n").join(noteDict[p["uuid"]]).replace( | |
"\n\n\n", "\n\n").replace("\n\n\n", "\n\n").replace("\n\n\n", "\n\n") + "\n\n" | |
p["mdNote"] = p["mdNote"] + p["cite"] | |
p["mdNote"] = p["mdNote"].replace('"', '\"').replace("'", "''").strip() | |
# NOW UPDATE EACH PAPER WITH THE NOTES AND CITATION INTO THE GENERAL | |
# NOTE SECTION IN PAPERS APP | |
sqlQueries = [] | |
sqlQueries.append("DROP TRIGGER _upd_log_Publication_notes;") | |
for p in paperMeta: | |
sqlQueries.append( | |
"""UPDATE Publication SET notes = '%(mdNote)s' where UUID = "%(uuid)s";""" % p) | |
sqlQueries.append("""CREATE TRIGGER _upd_log_Publication_notes AFTER UPDATE OF notes ON Publication | |
WHEN enableTransactionLog() AND (NEW."searchresult" = 0) AND (OLD."notes" IS NULL AND NEW."notes" IS NOT NULL OR OLD."notes" IS NOT NULL AND NEW."notes" IS NULL OR OLD."notes" <> NEW."notes") BEGIN | |
DELETE FROM changeLog WHERE modUUID = NEW."uuid" AND modColumn="notes" AND modType=2; -- delete old changes immediately, for cleanup | |
INSERT INTO changeLog (modifiedDate, modTable, modUUID, modType, modColumn, modValue, device, dbRevision) VALUES ((strftime("%%s", "now") + strftime("%%f", "now") - strftime("%%S", "now")), "Publication", NEW."uuid", 2, "notes", NEW."notes", device(), dbRevision());END""") | |
for q in sqlQueries: | |
try: | |
cursor.execute(q) | |
except Exception, e: | |
log.warning('%(e)s' % locals()) | |
log.info('completed the ``add_notes_and_citations_to_papers`` function') | |
return None | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
# encoding: utf-8 | |
""" | |
*Copy the general notes from selected papers in Papers.app to the clipoboard* | |
:Author: | |
David Young | |
:Date Created: | |
September 4, 2017 | |
Usage: | |
papers-copy-selected-papers-notes | |
Options: | |
-h, --help show this help message | |
""" | |
################# GLOBAL IMPORTS #################### | |
import sys | |
import os | |
import json | |
from os.path import expanduser | |
import sqlite3 | |
import psutil | |
import base64 | |
from polyglot.markdown import translate | |
from subprocess import Popen, PIPE, STDOUT | |
from operator import itemgetter | |
from fundamentals import tools | |
PAPERS_DB_PATH = "~/Library/Application Support/Papers3/Library_Data/C02N63X6G3QP__D741EA9B-BEF1-44A5-8087-4481F4EABAF3/Library.papers3/Database.papersdb" | |
def dict_factory(cursor, row): | |
d = {} | |
for idx, col in enumerate(cursor.description): | |
d[col[0]] = row[idx] | |
return d | |
def main(arguments=None): | |
""" | |
*The main function used when ``papers-cleanDB.py`` is run as a single script from the cl* | |
""" | |
# SETUP THE COMMAND-LINE UTIL SETTINGS | |
su = tools( | |
arguments=arguments, | |
docString=__doc__, | |
logLevel="WARNING", | |
options_first=False, | |
projectName=False | |
) | |
arguments, settings, log, dbConn = su.setup() | |
# UNPACK REMAINING CL ARGUMENTS USING `EXEC` TO SETUP THE VARIABLE NAMES | |
# AUTOMATICALLY | |
for arg, val in arguments.iteritems(): | |
if arg[0] == "-": | |
varname = arg.replace("-", "") + "Flag" | |
else: | |
varname = arg.replace("<", "").replace(">", "") | |
if isinstance(val, str) or isinstance(val, unicode): | |
exec(varname + " = '%s'" % (val,)) | |
else: | |
exec(varname + " = %s" % (val,)) | |
if arg == "--dbConn": | |
dbConn = val | |
log.debug('%s = %s' % (varname, val,)) | |
home = expanduser("~") | |
papersDB = PAPERS_DB_PATH.replace("~/", home + "/") | |
# DON'T CONTINUE IF PAPERS APP IS NOT OPEN | |
openApp = False | |
for pid in psutil.pids(): | |
p = psutil.Process(pid) | |
if "papers" in p.name().lower(): | |
if "Papers.app" in p.exe(): | |
openApp = True | |
if not openApp: | |
log.warning( | |
"Papers.app is not open, open the app before running this script") | |
return | |
paperUuids = grab_selected_paper_uuids_with_applescript(log) | |
paperUuids = ("', '").join(paperUuids) | |
# # CONNECT TO PAPERS DATABASE | |
conn = sqlite3.connect(papersDB) | |
conn.row_factory = dict_factory | |
cursor = conn.cursor() | |
# GENERATE THE QUERY TO GRAB THE NOTES FROM THE DATABASE | |
query = """select notes from Publication where UUID in ('%(paperUuids)s');""" % locals( | |
) | |
cursor.execute(query) | |
results = cursor.fetchall() | |
allNotes = [] | |
for r in results: | |
allNotes.append(r["notes"]) | |
allNotes = (u"\n\n").join(allNotes) | |
cursor.close() | |
print allNotes.encode("utf-8") | |
return allNotes | |
def grab_selected_paper_uuids_with_applescript( | |
log): | |
"""*grab a list of the UUIDs of the selected papers via applescript* | |
**Key Arguments:** | |
- ``log`` -- logger | |
**Return:** | |
- ``paperUuids`` -- list of uuids the pdf/publication annotation belong to (same length as ``papersJson``) | |
""" | |
log.info('starting the ``grab_selected_paper_uuids_with_applescript`` function') | |
applescript = """ | |
tell application "Papers" | |
set uuidList to {} | |
set allSelected to selected publications of front library window | |
repeat with i from 1 to count of allSelected | |
set end of uuidList to (id of item i of allSelected) | |
end repeat | |
return uuidList | |
end tell | |
""" % locals() | |
cmd = "\n".join(["osascript << EOT", applescript, "EOT"]) | |
p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) | |
stdout, stderr = p.communicate() | |
# SPLIT UUIDS INTO LIST | |
paperUuids = stdout.strip() | |
paperUuids = paperUuids.split(", ") | |
log.info('completed the ``grab_selected_paper_uuids_with_applescript`` function') | |
return paperUuids | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment