Skip to content

Instantly share code, notes, and snippets.

@specter119
Last active March 11, 2024 11:05
Show Gist options
  • Star 11 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save specter119/b79dc35a6091d0fd0896a9536fbddb5a to your computer and use it in GitHub Desktop.
Save specter119/b79dc35a6091d0fd0896a9536fbddb5a to your computer and use it in GitHub Desktop.
remove files not maintained by zotero
#!/usr/bin/env python
# coding: utf-8
from __future__ import print_function
import configparser
import re
import shutil
import sqlite3
import sys
try:
from pathlib import Path
except ImportError:
from pathlib2 import Path
if sys.version_info.major == 2:
reload(sys)
sys.setdefaultencoding('UTF8')
def get_zotfile_dest_and_zotero_data_dirs():
'''
Get the Zotero data dir and the Zotfile destination dir in PosixPath type
'''
profile_dirs = {
'darwin': Path.home() / 'Library/Application Support/Zotero',
'linux': Path.home() / '.zotero/zotero',
'linux2': Path.home() / '.zotero/zotero',
'win32': Path.home() / 'AppData/Roaming/Zotero/Zotero'
}
profile_dir = profile_dirs[sys.platform]
config = configparser.ConfigParser()
config.read('{}'.format(profile_dir / 'profiles.ini'))
configs_loc = profile_dir / config['Profile0']['Path'] / 'prefs.js'
configs = configs_loc.read_text()
zotero_data_pat = re.compile(
r'user_pref\("extensions.zotero.dataDir",\ "(?P<zotero_data>.+)"\);')
zotero_data_dir = Path(zotero_data_pat.search(
configs).group('zotero_data'))
zotfile_dest_pat = re.compile(
r'user_pref\("extensions.zotfile.dest_dir",\ "(?P<zotfile_dest>.+)"\);')
zotfile_dest_dir = Path(
zotfile_dest_pat.search(configs).group('zotfile_dest'))
return zotero_data_dir, zotfile_dest_dir
def get_unmaintained_files(zotero_data_dir,
zotfile_dest_dir,
case_sensitive='auto'):
'''
Get a list of atthchment in PosixPath type that unmaintained in the Zotero
Args:
zotero_data_dir(PosixPath): Zotero data dir
zotfile_dest_dir(PosixPath): Zotfile destination dir
case_sensitive(bool or str): wether the os is case sensitive,
default set linux as True, and rest as False
'''
attachments_local = set(p.as_posix() for p in zotfile_dest_dir.glob('**/*')
if p.is_file() and p.name[0] != '.')
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
with con:
cur = con.cursor()
cur.execute('SELECT path FROM itemAttachments WHERE linkMode = 2')
attachments_zotero = set([
p.as_posix() for p in [
zotfile_dest_dir / p[0].replace('attachments:', '', 1)
for p in cur.fetchall()
]
])
if sys.platform == 'darwin':
import unicodedata
attachments_zotero = set(
list(attachments_zotero) +
[unicodedata.normalize('NFD', p) for p in attachments_zotero])
if case_sensitive == 'auto':
case_sensitive = {
'darwin': False,
'linux': True,
'linux2': True,
'win32': False
}[sys.platform]
if not case_sensitive:
attachments_local = set([p.lower() for p in attachments_local])
attachments_zotero = set([p.lower() for p in attachments_zotero])
attachments_to_remove = attachments_local - attachments_zotero
return [Path(p) for p in attachments_to_remove]
def remove_unmaintained(attachments_to_remove):
'''
Remove the unmaintained files in PosixPath type, and clear empty dirs
'''
[p.unlink() for p in attachments_to_remove]
empty_dirs = [
p for p in zotfile_dest_dir.glob('**/*') if (not p.is_file()) and (
not len([f for f in list(p.iterdir()) if f.name[0] != '.']))
]
[shutil.rmtree(p.as_posix(), ignore_errors=True) for p in empty_dirs]
if __name__ == '__main__':
zotero_data_dir, zotfile_dest_dir = get_zotfile_dest_and_zotero_data_dirs()
attachments_to_remove = get_unmaintained_files(zotero_data_dir,
zotfile_dest_dir)
try:
import click
print('The following files are no longer managed by Zotero:')
print('\n'.join([' {}'.format(p) for p in attachments_to_remove]))
if click.confirm('Do you want remove them?', default=True):
remove_unmaintained(attachments_to_remove)
except ImportError:
print(
'The following files no longer managed by Zotero will be removed:')
print('\n'.join([' {}'.format(p) for p in attachments_to_remove]))
remove_unmaintained(attachments_to_remove)
@ZMAlt
Copy link

ZMAlt commented Sep 3, 2019

Traceback (most recent call last):
  File "test.py", line 109, in <module>
    zotero_data_dir, zotfile_dest_dir = get_zotfile_dest_and_zotero_data_dirs()
  File "test.py", line 46, in get_zotfile_dest_and_zotero_data_dirs
    zotfile_dest_pat.search(configs).group('zotfile_dest'))
AttributeError: 'NoneType' object has no attribute 'group'

请问刘同学,上面的错是什么原因?

macOS
Anaconda python3.6.5

@specter119
Copy link
Author

@ZMAlt

没装 ZotFile 吗?还装了 ZotFile 没自定义 Location of Files?

@leonsong09
Copy link

Traceback (most recent call last):
File "zot_rm_unmaintained_files.py", line 110, in
attachments_to_remove = get_unmaintained_files(zotero_data_dir,
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
我运行的时候出现了上面问题,Python 3.8.2

@specter119
Copy link
Author

Traceback (most recent call last):
File "zot_rm_unmaintained_files.py", line 110, in
attachments_to_remove = get_unmaintained_files(zotero_data_dir,
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
我运行的时候出现了上面问题,Python 3.8.2

Zotero 开着呢么?sqlite 不能并行读写。不过我应该往里加个更合适的报错。

@leonsong09
Copy link

File "zot_rm_unmaintained_files.py", line 111, in
zotfile_dest_dir)
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
关闭之后使用是这个情况

@specter119
Copy link
Author

File "zot_rm_unmaintained_files.py", line 111, in
zotfile_dest_dir)
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
关闭之后使用是这个情况

在109行下加一行 print(zotero_data_dir, zotfile_dest_dir),注意缩进,看看获得的两个目录对不对。

@leonsong09
Copy link

Traceback (most recent call last):
File "zot_rm_unmaintained_files.py", line 112, in
zotfile_dest_dir)
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
添加之后这样子

@leonsong09
Copy link

G:\澶囦唤鐩榎\鏂囩尞\Zotero G:\澶囦唤鐩榎\鏂囩尞\Zotero\storage
路径乱码

File "zot_rm_unmaintained_files.py", line 111, in
zotfile_dest_dir)
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
关闭之后使用是这个情况

在109行下加一行 print(zotero_data_dir, zotfile_dest_dir),注意缩进,看看获得的两个目录对不对。

@specter119
Copy link
Author

G:\澶囦唤鐩榎\鏂囩尞\Zotero G:\澶囦唤鐩榎\鏂囩尞\Zotero\storage
路径乱码

File "zot_rm_unmaintained_files.py", line 111, in
zotfile_dest_dir)
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
关闭之后使用是这个情况

在109行下加一行 print(zotero_data_dir, zotfile_dest_dir),注意缩进,看看获得的两个目录对不对。

呃,windows 下更不能用中文目录啊,估计路径换下编码就就可以吧,不准备改了的。

@specter119
Copy link
Author

G:\澶囦唤鐩榎\鏂囩尞\Zotero G:\澶囦唤鐩榎\鏂囩尞\Zotero\storage
路径乱码

File "zot_rm_unmaintained_files.py", line 111, in
zotfile_dest_dir)
File "zot_rm_unmaintained_files.py", line 65, in get_unmaintained_files
con = sqlite3.connect('{}'.format(zotero_data_dir / 'zotero.sqlite'))
sqlite3.OperationalError: unable to open database file
关闭之后使用是这个情况

在109行下加一行 print(zotero_data_dir, zotfile_dest_dir),注意缩进,看看获得的两个目录对不对。

你试试 37 行读文本的时候,换下文本编码,默认应该是utf-8,应该是对的。我这个脚本17到19行是给python2切换到 UTF-8 的,你可以都试试,我记得 python3 不需要。

@ljhburn
Copy link

ljhburn commented Apr 1, 2020

(py38) C:\Users\hp->C:\Users\hp-\Desktop\zot_rm_unmaintained_files.py
Traceback (most recent call last):
File "C:\Users\hp-\Desktop\zot_rm_unmaintained_files.py", line 109, in
zotero_data_dir, zotfile_dest_dir = get_zotfile_dest_and_zotero_data_dirs()
File "C:\Users\hp-\Desktop\zot_rm_unmaintained_files.py", line 37, in get_zotfile_dest_and_zotero_data_dirs
configs = configs_loc.read_text()
File "F:\anaconda3\lib\pathlib.py", line 1217, in read_text
return f.read()
UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 7061: illegal multibyte sequence
新手上路,不知道出了什么问题?

@specter119
Copy link
Author

(py38) C:\Users\hp->C:\Users\hp-\Desktop\zot_rm_unmaintained_files.py
Traceback (most recent call last):
File "C:\Users\hp-\Desktop\zot_rm_unmaintained_files.py", line 109, in
zotero_data_dir, zotfile_dest_dir = get_zotfile_dest_and_zotero_data_dirs()
File "C:\Users\hp-\Desktop\zot_rm_unmaintained_files.py", line 37, in get_zotfile_dest_and_zotero_data_dirs
configs = configs_loc.read_text()
File "F:\anaconda3\lib\pathlib.py", line 1217, in read_text
return f.read()
UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 7061: illegal multibyte sequence
新手上路,不知道出了什么问题?

你试试把37行改为 configs = configs_loc.read_text(encoding='utf-8'),记得保持缩进。为啥win下这么多编码问题啊。

@deanneko
Copy link

deanneko commented Jun 19, 2020

(base) C:\Users\sunsh\Desktop>zot_rm_unmaintained_files.py
The following files are no longer managed by Zotero:

Do you want remove them? [Y/n]: Y
知乎留言转到这里,执行上述操作后,同步盘内重复文件并未删除,坚果云默认路径,有中文。
Miniconda3 Windows 64-bit,Python3.7

更新1:修改为纯英文路径后将原来的文件复制过去(保证有重复的文件),在zotero中更改路径同步后再执行脚本,与之前相同,重复的文件还在。

更新2:原因搞清楚了,相同文件名不同后缀的pdf不会被删除(例如abc,abc1,abc2...),之前从endnote导入到zotero里有些文献重复关联了相同的pdf,改名后就可以正常删除,希望后续能更新脚本。

@wbbeyourself
Copy link

请问zotero5没有profiles.ini怎么办?

@specter119
Copy link
Author

specter119 commented Dec 17, 2020

(base) C:\Users\sunsh\Desktop>zot_rm_unmaintained_files.py
The following files are no longer managed by Zotero:

Do you want remove them? [Y/n]: Y
知乎留言转到这里,执行上述操作后,同步盘内重复文件并未删除,坚果云默认路径,有中文。
Miniconda3 Windows 64-bit,Python3.7

更新1:修改为纯英文路径后将原来的文件复制过去(保证有重复的文件),在zotero中更改路径同步后再执行脚本,与之前相同,重复的文件还在。

更新2:原因搞清楚了,相同文件名不同后缀的pdf不会被删除(例如abc,abc1,abc2...),之前从endnote导入到zotero里有些文献重复关联了相同的pdf,改名后就可以正常删除,希望后续能更新脚本。

abc abc1 abc2 不叫相同文件名不同后缀,这已经是不同文件名了。
”有些文献关联相同 pdf“这个说法不妥,zotero 或者 zotfile 都很难做到这个,除非手动。按照你的描述,更像是因导入产生的重复条目。
造成abc,abc1,abc2 的在我所见有一种情况,就是zotfile 在同一目录下产生多个同名附件,原因可能是同一条目多附件,也可能是相同目录下的不同条目生成的重名附件。而zotfile在处理这种重名的时候,本来就有bug。

@specter119
Copy link
Author

请问zotero5没有profiles.ini怎么办?

我写这个脚本的时候,zotero 早就是5以后的版本了,脚本找不到文件可能是用户中文目录的问题。

@wilmerwang
Copy link

有没有考虑增加删除zotero数据库无效attachment链接?

@specter119
Copy link
Author

有没有考虑增加删除zotero数据库无效attachment链接?

嗯,这个脚本反过来比就行,问题是你怎么会产生大量无效的链接附件呢?
如果是你不小心删除了,或者挪地方了,不会想着先抢救下吗?

@zsc233
Copy link

zsc233 commented Mar 11, 2024

image

@specter119
Copy link
Author

specter119 commented Mar 11, 2024

image

emmm, Zotero 7 用了 zotero-attanger ?应该config换了,有空我再更新脚本吧。。。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment