anjiro/Papers3_to_Zotero.py

## Papers3_to_Zotero.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
This script takes as input a BibTeX library exported from readcube/mekentosj Papers3 and outputs a BibTex library for Zotero to import.
The script preserves your Papers citekeys, adds supplementary files from the Papers3 Library, removes duplicate links to PDFs and removes extraneous *.html and *.webarchive files that are often created by importing articles into Paper from a web browser.

__Instructions for use__:

* Make sure to have Better BibTeX pre-installed to Zotero if you want to preserve the Papers citekeys.

* Export your Papers3 library as a *.bib file.
    Export > BibTeX Library
    Make sure to set the “BibTex Record” option to “Complete”. This will cause papers to include the paths to the main PDF (or whatever) file in the *.bib export

* In this script, update the ‘papers_library’ and ‘bibtex_library’ variables with the paths to your Papers3 library and the BibTeX library that you just exported.
    e.g.
    bibtex_library = Path(“~/Desktop/full_library_export.bib") ### Path to Papers BibTex library export
    papers_library = Path(“~/Documents/user’s Library/Library.papers3") ### Path to Papers3 Library

* Run this script with python 3.5 or higher

* Import the 'zotero_import.bib’ file that gets generated with Zotero.

* Be sure to check the 'Import errors found:' file if Zotero generates one (if it exists, it will be in whatever folder you imported the library to; sort by title to find it).

__NOTE__:
The Collections groupings are not preserved with this method. This is one way to manually get your Papers3 Collections into Zotero after following the above instructions:

* Export each collection as a BibTex library (“Export” set to “Selected Collection” and “BibTex Record” set to “Standard”). This will prevent any file paths from being included in the *.bib file.

* Import that *.bib file directly to Zotero with the option to “Place imported collections and items into new collection” selected.

* Then merge the duplicate records. That will give you a new collection with links to right papers from your Zotero library.

* In this strategy, you have to do that for each one of your Papers3 Collections. Not ideal but maybe tolerable.
"""

from pathlib import Path
import re

### Update these paths:
bibtex_library = Path("~/Desktop/library.bib").expanduser() ### Path to Papers BibTeX library export
papers_library = Path("~/Documents/Library.papers3").expanduser() ### Path to Papers3 Library

out = list()
papers_library_string = str(papers_library) + '/'

if papers_library_string[-9:] != '.papers3/':
    raise Exception(f'The variable \'papers_library\' should end in with \'.papers3\' but is rather: \n\t{str(papers_library)}')
if not papers_library.is_dir():
    raise Exception(f'The path you provided to the Papers3 library does not seem to exist: \n\t{str(papers_library)}')
if not bibtex_library.is_file() and bibtex_library.suffix == '.bib':
    raise Exception(f'The path you provided to the BibTex Library file you exported from Papers3 does not seem to exist or is not \'.bib\' file: \n\t{str(bibtex_library)}')

with open(bibtex_library, 'r') as btlib:
    for line in btlib:
        if line.startswith('file = {'):
            templine = re.sub(r'^file = {{(.*?)}},?', r'file = {\1},', line, flags = re.M)
            newline = re.sub(r'^file = {(.*?);(\1)},?', r'file = {\1},', templine, flags = re.M)

            assert ';' not in newline ### assert there is only one file

            result = re.search(r"^file = {.*?:" + papers_library_string + r"(.*?)\.(.*?):(.*?/.*?)},?", newline)

            primary_file_path = Path(papers_library_string) / Path(result.group(1)).with_suffix('.'+result.group(2))

            supp_files = list()
            for dir_extra in ['Supplemental', 'Media']:
                supp_dir = primary_file_path.parents[0] / dir_extra
                if supp_dir.exists():
                    for x in supp_dir.iterdir():
                        if x.is_file() and x.suffix not in ['.html', '.webarchive'] and str(x) != str(primary_file_path):
                            supp_files.append(x)

                    if len(supp_files) > 0:
                        primary_line = re.search(r"(^file = {.*?:" + papers_library_string + r".*?\..*?:application/.*?)},?", newline)
                        newline = primary_line.group(1)
                        for x in supp_files:
                            print(f'adding supplementary file for {x.name}')
                            newline += f';{x.with_suffix("").name + " Supp" + x.suffix}:{x}:application/{x.suffix}'
                        newline += '},\n'
            out.append(newline)
        else:
            out.append(line)


### New BibTeX record to import into Zotero
modified_lib = bibtex_library.parents[0] / 'zotero_import.bib'
with open(modified_lib, 'w') as outfile:
    for item in out:
        outfile.write(item)

print(f'\n\nScript appears to have completed successfully. You can now import this file into Zotero (make sure Better BibTeX is already installed): \n\t{str(modified_lib)}')
	#!/usr/bin/env python
	# -- coding: utf-8 --

	"""
	This script takes as input a BibTeX library exported from readcube/mekentosj Papers3 and outputs a BibTex library for Zotero to import.
	The script preserves your Papers citekeys, adds supplementary files from the Papers3 Library, removes duplicate links to PDFs and removes extraneous .html and .webarchive files that are often created by importing articles into Paper from a web browser.

	__Instructions for use__:

	* Make sure to have Better BibTeX pre-installed to Zotero if you want to preserve the Papers citekeys.

	* Export your Papers3 library as a *.bib file.
	Export > BibTeX Library
	Make sure to set the “BibTex Record” option to “Complete”. This will cause papers to include the paths to the main PDF (or whatever) file in the *.bib export

	* In this script, update the ‘papers_library’ and ‘bibtex_library’ variables with the paths to your Papers3 library and the BibTeX library that you just exported.
	e.g.
	bibtex_library = Path(“~/Desktop/full_library_export.bib") ### Path to Papers BibTex library export
	papers_library = Path(“~/Documents/user’s Library/Library.papers3") ### Path to Papers3 Library

	* Run this script with python 3.5 or higher

	* Import the 'zotero_import.bib’ file that gets generated with Zotero.

	* Be sure to check the 'Import errors found:' file if Zotero generates one (if it exists, it will be in whatever folder you imported the library to; sort by title to find it).

	__NOTE__:
	The Collections groupings are not preserved with this method. This is one way to manually get your Papers3 Collections into Zotero after following the above instructions:

	* Export each collection as a BibTex library (“Export” set to “Selected Collection” and “BibTex Record” set to “Standard”). This will prevent any file paths from being included in the *.bib file.

	* Import that *.bib file directly to Zotero with the option to “Place imported collections and items into new collection” selected.

	* Then merge the duplicate records. That will give you a new collection with links to right papers from your Zotero library.

	* In this strategy, you have to do that for each one of your Papers3 Collections. Not ideal but maybe tolerable.
	"""

	from pathlib import Path
	import re

	### Update these paths:
	bibtex_library = Path("~/Desktop/library.bib").expanduser() ### Path to Papers BibTeX library export
	papers_library = Path("~/Documents/Library.papers3").expanduser() ### Path to Papers3 Library

	out = list()
	papers_library_string = str(papers_library) + '/'

	if papers_library_string[-9:] != '.papers3/':
	raise Exception(f'The variable \'papers_library\' should end in with \'.papers3\' but is rather: \n\t{str(papers_library)}')
	if not papers_library.is_dir():
	raise Exception(f'The path you provided to the Papers3 library does not seem to exist: \n\t{str(papers_library)}')
	if not bibtex_library.is_file() and bibtex_library.suffix == '.bib':
	raise Exception(f'The path you provided to the BibTex Library file you exported from Papers3 does not seem to exist or is not \'.bib\' file: \n\t{str(bibtex_library)}')

	with open(bibtex_library, 'r') as btlib:
	for line in btlib:
	if line.startswith('file = {'):
	templine = re.sub(r'^file = {{(.*?)}},?', r'file = {\1},', line, flags = re.M)
	newline = re.sub(r'^file = {(.*?);(\1)},?', r'file = {\1},', templine, flags = re.M)

	assert ';' not in newline ### assert there is only one file

	result = re.search(r"^file = {.?:" + papers_library_string + r"(.?)\.(.?):(.?/.*?)},?", newline)

	primary_file_path = Path(papers_library_string) / Path(result.group(1)).with_suffix('.'+result.group(2))

	supp_files = list()
	for dir_extra in ['Supplemental', 'Media']:
	supp_dir = primary_file_path.parents[0] / dir_extra
	if supp_dir.exists():
	for x in supp_dir.iterdir():
	if x.is_file() and x.suffix not in ['.html', '.webarchive'] and str(x) != str(primary_file_path):
	supp_files.append(x)

	if len(supp_files) > 0:
	primary_line = re.search(r"(^file = {.?:" + papers_library_string + r".?\..?:application/.?)},?", newline)
	newline = primary_line.group(1)
	for x in supp_files:
	print(f'adding supplementary file for {x.name}')
	newline += f';{x.with_suffix("").name + " Supp" + x.suffix}:{x}:application/{x.suffix}'
	newline += '},\n'
	out.append(newline)
	else:
	out.append(line)


	### New BibTeX record to import into Zotero
	modified_lib = bibtex_library.parents[0] / 'zotero_import.bib'
	with open(modified_lib, 'w') as outfile:
	for item in out:
	outfile.write(item)

	print(f'\n\nScript appears to have completed successfully. You can now import this file into Zotero (make sure Better BibTeX is already installed): \n\t{str(modified_lib)}')