Created
December 20, 2021 13:01
-
-
Save kahosan/7b453e2fda6a9309340fbc2cfad852e8 to your computer and use it in GitHub Desktop.
将 Pixiv 收集的图整理到对应的画师文件夹中
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Author: kaho | |
Date: 2021-10-17 19:21:49 | |
Mail: kahosan@outlook.com | |
LastEditTime: 2021-10-18 21:27:29 | |
''' | |
# ========================================== | |
# 将收集的散图根据画师分门别类的整理好 | |
# ========================================== | |
# 使用方法: 把待处理的「文件夹」拖放到脚本上 | |
# 如果待处理的图片名格式不为 illust_{id} 这样,需要更改正则适配为对应的格式 | |
import os | |
import re | |
import sys | |
import json | |
import shutil | |
import requests | |
import multiprocessing | |
from tqdm.std import tqdm | |
# 是否使用代理 | |
I_AM_CHINESE = False | |
ROOTPATH = "" | |
RE = "(?<=illust_)(\d{8})" # 获取图片 id 使用的正则,需要根据自己的图片名格式设置 | |
URL = "https://www.pixiv.net/ajax/illust/" # 通过 pixiv 的一个接口获取数据 画师和图片的 id, name 都包含在里面 | |
if len(sys.argv) > 1: # 获取拖放文件夹的路径 | |
ROOTPATH = sys.argv[1] | |
# 准备 session 并配置代理 | |
proxy = {'http': 'http://127.0.0.1:8888', 'https': 'http://127.0.0.1:8888'} | |
session = requests.Session() | |
if I_AM_CHINESE: | |
session.proxies.update(proxy) | |
def get_dict_val(_dict: dict, key: str): # 获取 json 里所需的数据 | |
result = "" | |
if isinstance(_dict, dict): | |
if _dict.get(key): | |
return _dict.get(key) | |
for k, v in _dict.items(): | |
result = get_dict_val(v, key) | |
return result | |
def mkdir_artist(root_path: str, name: str, id: str, img_id: str): # 创建画师文件夹 | |
folder_path = f"{root_path}\{name}_{id}" | |
if not os.path.exists(folder_path): | |
try: | |
os.mkdir(folder_path) | |
except: | |
# 大部分不合法情况都通过 replace 替换掉了,还有漏网之鱼就需要自己动手了 | |
print(f"画师名不合法: '{name}' 图片 ID: '{img_id}'\n") | |
return False | |
def get_img_list(root_path: str): # 获取路径下的所有文件名并将匹配到的 id 放入列表 | |
img_id_list = [] | |
raw_id_list = [] | |
dirpath = os.listdir(root_path) | |
for raw_id in dirpath: | |
result = re.search(RE, raw_id) | |
if result != None and os.path.isfile(os.path.join(root_path, raw_id)): | |
id = result.group() | |
img_id_list.append(id) | |
raw_id_list.append(raw_id) | |
else: | |
print(f"该文件名错误或该文件为文件夹: {raw_id}") | |
return img_id_list, raw_id_list | |
def start(old_id: str, new_id: str): # 启动函数 | |
url = f"{URL}{new_id}" | |
r = session.get(url) | |
if r.status_code != 200: # 这里有三种种情况,图片已被删除、不存在此图片 id、网络问题 | |
print(f' ERROR: {r.status_code} in {new_id}\n{get_dict_val(json.loads(r.content), "message")}\n') | |
return | |
data = json.loads(r.content) | |
artist_name = get_dict_val(data, "userName") | |
# 替换画师名中 Windows 不支持的字符 | |
artist_name = artist_name \ | |
.replace('\\','╲').replace('/','/').replace(':',':') \ | |
.replace('*','⚝').replace('?','?').replace('"','\'\'') \ | |
.replace('<','‹').replace('>','›').replace('|','|') | |
artist_id = get_dict_val(data, "userId") | |
old_path = f"{ROOTPATH}\{old_id}" | |
new_path = f"{ROOTPATH}\{artist_name}_{artist_id}" | |
# 如有漏网之鱼,不进行移动操作 | |
if mkdir_artist(ROOTPATH, artist_name, artist_id, new_id) != False: | |
shutil.move(old_path, new_path) | |
if __name__ == '__main__': | |
multiprocessing.freeze_support() | |
img_id_list, raw_id_list = get_img_list(ROOTPATH) | |
# 用一些咱也不太懂的多线程,再加一个进度条显示(感觉不加也没区别 | |
with multiprocessing.Pool(processes=6) as pool: | |
with tqdm(total=len(img_id_list), ascii=True) as pbar: | |
for i, _ in tqdm( | |
enumerate(pool.starmap(start, zip(raw_id_list, | |
img_id_list)))): | |
pbar.update() | |
print("移动完成") | |
os.system("PAUSE") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment