Last active
July 7, 2023 06:51
-
-
Save yi-ge/7ac4563874f67d88140a8526a4ec2c82 to your computer and use it in GitHub Desktop.
该脚本首先获取所有重复的title,然后提取相关的ID。接着,脚本通过比较Fields和URLs部分,找到重复的项目并将多余的项目ID添加到一个列表中。最后,根据需要,您可以选择删除多余的项目。 相关博文:《1Password去重脚本》 https://www.wyr.me/post/741
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import time | |
import pickle | |
import subprocess | |
from collections import defaultdict | |
def run_command(command, error_msg=None, show_output=True): | |
try: | |
output = subprocess.check_output( | |
command, shell=True, text=True, stderr=subprocess.STDOUT) | |
return output | |
except subprocess.CalledProcessError as e: | |
if error_msg: | |
print(error_msg) | |
if show_output: | |
print(e.output) | |
if ": EOF" in e.output: | |
print('出错了,正在重试...') | |
return run_command(command, error_msg, True) | |
return e.output | |
def extract_fields_and_urls(detail): | |
fields_start = detail.find("Fields:") | |
urls_start = detail.find("URLs:") | |
return detail[fields_start:urls_start] | |
# 获取重复的title列表 | |
print("正在获取重复的title列表...") | |
command = "op item list | awk '{print $2}' | sort | uniq -d" | |
duplicated_titles = run_command(command).splitlines() | |
# 提取ID的正则表达式 | |
id_pattern = re.compile(r'\b[a-zA-Z0-9]{26}\b') | |
# 存储每个title的详细信息 | |
if os.path.exists('cache.pkl'): | |
# 从文件中加载变量 | |
with open('cache.pkl', 'rb') as f: | |
title_details = pickle.load(f) | |
else: | |
# 计算新的变量title_details并将其写入文件 | |
title_details = defaultdict(list) | |
with open('cache.pkl', 'wb') as f: | |
pickle.dump(title_details, f) | |
times = 0 | |
# 获取所有重复title的详细信息 | |
print("正在获取所有重复title的详细信息...") | |
for title in duplicated_titles: | |
error_msg = f"在尝试获取title \"{title}\" 的详细信息。" | |
output = run_command(f"op item get {title}", error_msg, False) | |
ids = id_pattern.findall(output) | |
for item_id in ids: | |
if any(d['id'] == item_id for d in title_details[title]): | |
continue # 如果已经存在该id对应的信息,则跳过 | |
error_msg = f"在尝试获取ID \"{item_id}\" 的详细信息时发生错误。。" | |
item_info = run_command(f"op item get {item_id}", error_msg) | |
print(item_info) | |
title_details[title].append({'id': item_id, 'info': item_info}) | |
# 将变量title_details写入文件 | |
with open('cache.pkl', 'wb') as f: | |
pickle.dump(title_details, f) | |
times += 1 | |
if times % 10 == 0: | |
print(f"已完成{times}个title的处理。") | |
# 检查重复项目并保留一条数据 | |
extra_items = [] | |
print("正在检查重复项目并保留一条数据...") | |
for title, details in title_details.items(): | |
unique_items = [] | |
for detail in details: | |
fields_and_urls = extract_fields_and_urls(detail['info']) | |
if fields_and_urls not in unique_items: | |
unique_items.append(fields_and_urls) | |
else: | |
item_id = id_pattern.search(detail['info']).group() | |
extra_items.append(item_id) | |
print("\n需要删除的额外项目ID:") | |
for item_id in extra_items: | |
print(item_id) | |
# 根据需要,可以执行以下操作删除多余的项目 | |
for item_id in extra_items: | |
error_msg = f"删除 \"{item_id}\" 出现异常。" | |
out = run_command(f"op item delete {item_id}", error_msg) | |
print(out) | |
print('去重完成!') |
@androidcn 感谢反馈,不过我想这个我就不修改了,使用的人可以很轻松调整代码实现忽略URL的判断,我担心修改后导致一些同域名多子站点的网站无法被识别,特别是企业内部系统,因此可能会导致使用上不方便。
@yi-ge Thanks!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
我发现很多重复,标题和用户名密码是一样的,url不用判断