-
-
Save specter119/b79dc35a6091d0fd0896a9536fbddb5a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
# coding: utf-8 | |
from __future__ import print_function | |
import configparser | |
import re | |
import shutil | |
import sqlite3 | |
import sys | |
try: | |
from pathlib import Path | |
except ImportError: | |
from pathlib2 import Path | |
if sys.version_info.major == 2: | |
reload(sys) | |
sys.setdefaultencoding('UTF8') | |
def get_zotfile_dest_and_zotero_data_dirs(): | |
''' | |
Get the Zotero data dir and the Zotfile destination dir in PosixPath type | |
''' | |
profile_dirs = { | |
'darwin': Path.home() / 'Library/Application Support/Zotero', | |
'linux': Path.home() / '.zotero/zotero', | |
'linux2': Path.home() / '.zotero/zotero', | |
'win32': Path.home() / 'AppData/Roaming/Zotero/Zotero' | |
} | |
profile_dir = profile_dirs[sys.platform] | |
config = configparser.ConfigParser() | |
config.read('{}'.format(profile_dir / 'profiles.ini')) | |
configs_loc = profile_dir / config['Profile0']['Path'] / 'prefs.js' | |
configs = configs_loc.read_text() | |
zotero_data_pat = re.compile( | |
r'user_pref\("extensions.zotero.dataDir",\ "(?P<zotero_data>.+)"\);') | |
zotero_data_dir = Path(zotero_data_pat.search( | |
configs).group('zotero_data')) | |
zotfile_dest_pat = re.compile( | |
r'user_pref\("extensions.zotfile.dest_dir",\ "(?P<zotfile_dest>.+)"\);') | |
zotfile_dest_dir = Path( | |
zotfile_dest_pat.search(configs).group('zotfile_dest')) | |
return zotero_data_dir, zotfile_dest_dir | |
def get_unmaintained_files(zotero_data_dir, | |
zotfile_dest_dir, | |
case_sensitive='auto'): | |
''' | |
Get a list of atthchment in PosixPath type that unmaintained in the Zotero | |
Args: | |
zotero_data_dir(PosixPath): Zotero data dir | |
zotfile_dest_dir(PosixPath): Zotfile destination dir | |
case_sensitive(bool or str): wether the os is case sensitive, | |
default set linux as True, and rest as False | |
''' | |
attachments_local = set(p.as_posix() for p in zotfile_dest_dir.glob('**/*') | |
if p.is_file() and p.name[0] != '.') | |
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite')) | |
with con: | |
cur = con.cursor() | |
cur.execute('SELECT path FROM itemAttachments WHERE linkMode = 2') | |
attachments_zotero = set([ | |
p.as_posix() for p in [ | |
zotfile_dest_dir / p[0].replace('attachments:', '', 1) | |
for p in cur.fetchall() | |
] | |
]) | |
if sys.platform == 'darwin': | |
import unicodedata | |
attachments_zotero = set( | |
list(attachments_zotero) + | |
[unicodedata.normalize('NFD', p) for p in attachments_zotero]) | |
if case_sensitive == 'auto': | |
case_sensitive = { | |
'darwin': False, | |
'linux': True, | |
'linux2': True, | |
'win32': False | |
}[sys.platform] | |
if not case_sensitive: | |
attachments_local = set([p.lower() for p in attachments_local]) | |
attachments_zotero = set([p.lower() for p in attachments_zotero]) | |
attachments_to_remove = attachments_local - attachments_zotero | |
return [Path(p) for p in attachments_to_remove] | |
def remove_unmaintained(attachments_to_remove): | |
''' | |
Remove the unmaintained files in PosixPath type, and clear empty dirs | |
''' | |
[p.unlink() for p in attachments_to_remove] | |
empty_dirs = [ | |
p for p in zotfile_dest_dir.glob('**/*') if (not p.is_file()) and ( | |
not len([f for f in list(p.iterdir()) if f.name[0] != '.'])) | |
] | |
[shutil.rmtree(p.as_posix(), ignore_errors=True) for p in empty_dirs] | |
if __name__ == '__main__': | |
zotero_data_dir, zotfile_dest_dir = get_zotfile_dest_and_zotero_data_dirs() | |
attachments_to_remove = get_unmaintained_files(zotero_data_dir, | |
zotfile_dest_dir) | |
try: | |
import click | |
print('The following files are no longer managed by Zotero:') | |
print('\n'.join([' {}'.format(p) for p in attachments_to_remove])) | |
if click.confirm('Do you want remove them?', default=True): | |
remove_unmaintained(attachments_to_remove) | |
except ImportError: | |
print( | |
'The following files no longer managed by Zotero will be removed:') | |
print('\n'.join([' {}'.format(p) for p in attachments_to_remove])) | |
remove_unmaintained(attachments_to_remove) |
Traceback (most recent call last):
File "zot_rm_unmaintained_files.py", line 110, in
attachments_to_remove = get_unmaintained_files(zotero_data_dir,
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
我运行的时候出现了上面问题,Python 3.8.2
Zotero 开着呢么?sqlite 不能并行读写。不过我应该往里加个更合适的报错。
File "zot_rm_unmaintained_files.py", line 111, in
zotfile_dest_dir)
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
关闭之后使用是这个情况
File "zot_rm_unmaintained_files.py", line 111, in
zotfile_dest_dir)
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
关闭之后使用是这个情况
在109行下加一行 print(zotero_data_dir, zotfile_dest_dir),注意缩进,看看获得的两个目录对不对。
Traceback (most recent call last):
File "zot_rm_unmaintained_files.py", line 112, in
zotfile_dest_dir)
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
添加之后这样子
G:\澶囦唤鐩榎\鏂囩尞\Zotero G:\澶囦唤鐩榎\鏂囩尞\Zotero\storage
路径乱码
File "zot_rm_unmaintained_files.py", line 111, in
zotfile_dest_dir)
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
关闭之后使用是这个情况在109行下加一行 print(zotero_data_dir, zotfile_dest_dir),注意缩进,看看获得的两个目录对不对。
G:\澶囦唤鐩榎\鏂囩尞\Zotero G:\澶囦唤鐩榎\鏂囩尞\Zotero\storage
路径乱码File "zot_rm_unmaintained_files.py", line 111, in
zotfile_dest_dir)
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
关闭之后使用是这个情况在109行下加一行 print(zotero_data_dir, zotfile_dest_dir),注意缩进,看看获得的两个目录对不对。
呃,windows 下更不能用中文目录啊,估计路径换下编码就就可以吧,不准备改了的。
G:\澶囦唤鐩榎\鏂囩尞\Zotero G:\澶囦唤鐩榎\鏂囩尞\Zotero\storage
路径乱码File "zot_rm_unmaintained_files.py", line 111, in
zotfile_dest_dir)
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
关闭之后使用是这个情况在109行下加一行 print(zotero_data_dir, zotfile_dest_dir),注意缩进,看看获得的两个目录对不对。
你试试 37 行读文本的时候,换下文本编码,默认应该是utf-8,应该是对的。我这个脚本17到19行是给python2切换到 UTF-8 的,你可以都试试,我记得 python3 不需要。
(py38) C:\Users\hp->C:\Users\hp-\Desktop\zot_rm_unmaintained_files.py
Traceback (most recent call last):
File "C:\Users\hp-\Desktop\zot_rm_unmaintained_files.py", line 109, in
zotero_data_dir, zotfile_dest_dir = get_zotfile_dest_and_zotero_data_dirs()
File "C:\Users\hp-\Desktop\zot_rm_unmaintained_files.py", line 37, in get_zotfile_dest_and_zotero_data_dirs
configs = configs_loc.read_text()
File "F:\anaconda3\lib\pathlib.py", line 1217, in read_text
return f.read()
UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 7061: illegal multibyte sequence
新手上路,不知道出了什么问题?
(py38) C:\Users\hp->C:\Users\hp-\Desktop\zot_rm_unmaintained_files.py
Traceback (most recent call last):
File "C:\Users\hp-\Desktop\zot_rm_unmaintained_files.py", line 109, in
zotero_data_dir, zotfile_dest_dir = get_zotfile_dest_and_zotero_data_dirs()
File "C:\Users\hp-\Desktop\zot_rm_unmaintained_files.py", line 37, in get_zotfile_dest_and_zotero_data_dirs
configs = configs_loc.read_text()
File "F:\anaconda3\lib\pathlib.py", line 1217, in read_text
return f.read()
UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 7061: illegal multibyte sequence
新手上路,不知道出了什么问题?
你试试把37行改为 configs = configs_loc.read_text(encoding='utf-8')
,记得保持缩进。为啥win下这么多编码问题啊。
(base) C:\Users\sunsh\Desktop>zot_rm_unmaintained_files.py
The following files are no longer managed by Zotero:
Do you want remove them? [Y/n]: Y
知乎留言转到这里,执行上述操作后,同步盘内重复文件并未删除,坚果云默认路径,有中文。
Miniconda3 Windows 64-bit,Python3.7
更新1:修改为纯英文路径后将原来的文件复制过去(保证有重复的文件),在zotero中更改路径同步后再执行脚本,与之前相同,重复的文件还在。
更新2:原因搞清楚了,相同文件名不同后缀的pdf不会被删除(例如abc,abc1,abc2...),之前从endnote导入到zotero里有些文献重复关联了相同的pdf,改名后就可以正常删除,希望后续能更新脚本。
请问zotero5没有profiles.ini怎么办?
(base) C:\Users\sunsh\Desktop>zot_rm_unmaintained_files.py
The following files are no longer managed by Zotero:Do you want remove them? [Y/n]: Y
知乎留言转到这里,执行上述操作后,同步盘内重复文件并未删除,坚果云默认路径,有中文。
Miniconda3 Windows 64-bit,Python3.7更新1:修改为纯英文路径后将原来的文件复制过去(保证有重复的文件),在zotero中更改路径同步后再执行脚本,与之前相同,重复的文件还在。
更新2:原因搞清楚了,相同文件名不同后缀的pdf不会被删除(例如abc,abc1,abc2...),之前从endnote导入到zotero里有些文献重复关联了相同的pdf,改名后就可以正常删除,希望后续能更新脚本。
abc abc1 abc2 不叫相同文件名不同后缀,这已经是不同文件名了。
”有些文献关联相同 pdf“这个说法不妥,zotero 或者 zotfile 都很难做到这个,除非手动。按照你的描述,更像是因导入产生的重复条目。
造成abc,abc1,abc2 的在我所见有一种情况,就是zotfile 在同一目录下产生多个同名附件,原因可能是同一条目多附件,也可能是相同目录下的不同条目生成的重名附件。而zotfile在处理这种重名的时候,本来就有bug。
请问zotero5没有profiles.ini怎么办?
我写这个脚本的时候,zotero 早就是5以后的版本了,脚本找不到文件可能是用户中文目录的问题。
有没有考虑增加删除zotero数据库无效attachment链接?
有没有考虑增加删除zotero数据库无效attachment链接?
嗯,这个脚本反过来比就行,问题是你怎么会产生大量无效的链接附件呢?
如果是你不小心删除了,或者挪地方了,不会想着先抢救下吗?
Traceback (most recent call last):
File "zot_rm_unmaintained_files.py", line 110, in
attachments_to_remove = get_unmaintained_files(zotero_data_dir,
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
我运行的时候出现了上面问题,Python 3.8.2