Skip to content

Instantly share code, notes, and snippets.

@kahosan
Created December 20, 2021 13:01
Show Gist options
  • Save kahosan/7b453e2fda6a9309340fbc2cfad852e8 to your computer and use it in GitHub Desktop.
Save kahosan/7b453e2fda6a9309340fbc2cfad852e8 to your computer and use it in GitHub Desktop.
将 Pixiv 收集的图整理到对应的画师文件夹中
'''
Author: kaho
Date: 2021-10-17 19:21:49
Mail: kahosan@outlook.com
LastEditTime: 2021-10-18 21:27:29
'''
# ==========================================
# 将收集的散图根据画师分门别类的整理好
# ==========================================
# 使用方法: 把待处理的「文件夹」拖放到脚本上
# 如果待处理的图片名格式不为 illust_{id} 这样,需要更改正则适配为对应的格式
import os
import re
import sys
import json
import shutil
import requests
import multiprocessing
from tqdm.std import tqdm
# 是否使用代理
I_AM_CHINESE = False
ROOTPATH = ""
RE = "(?<=illust_)(\d{8})" # 获取图片 id 使用的正则,需要根据自己的图片名格式设置
URL = "https://www.pixiv.net/ajax/illust/" # 通过 pixiv 的一个接口获取数据 画师和图片的 id, name 都包含在里面
if len(sys.argv) > 1: # 获取拖放文件夹的路径
ROOTPATH = sys.argv[1]
# 准备 session 并配置代理
proxy = {'http': 'http://127.0.0.1:8888', 'https': 'http://127.0.0.1:8888'}
session = requests.Session()
if I_AM_CHINESE:
session.proxies.update(proxy)
def get_dict_val(_dict: dict, key: str): # 获取 json 里所需的数据
result = ""
if isinstance(_dict, dict):
if _dict.get(key):
return _dict.get(key)
for k, v in _dict.items():
result = get_dict_val(v, key)
return result
def mkdir_artist(root_path: str, name: str, id: str, img_id: str): # 创建画师文件夹
folder_path = f"{root_path}\{name}_{id}"
if not os.path.exists(folder_path):
try:
os.mkdir(folder_path)
except:
# 大部分不合法情况都通过 replace 替换掉了,还有漏网之鱼就需要自己动手了
print(f"画师名不合法: '{name}' 图片 ID: '{img_id}'\n")
return False
def get_img_list(root_path: str): # 获取路径下的所有文件名并将匹配到的 id 放入列表
img_id_list = []
raw_id_list = []
dirpath = os.listdir(root_path)
for raw_id in dirpath:
result = re.search(RE, raw_id)
if result != None and os.path.isfile(os.path.join(root_path, raw_id)):
id = result.group()
img_id_list.append(id)
raw_id_list.append(raw_id)
else:
print(f"该文件名错误或该文件为文件夹: {raw_id}")
return img_id_list, raw_id_list
def start(old_id: str, new_id: str): # 启动函数
url = f"{URL}{new_id}"
r = session.get(url)
if r.status_code != 200: # 这里有三种种情况,图片已被删除、不存在此图片 id、网络问题
print(f' ERROR: {r.status_code} in {new_id}\n{get_dict_val(json.loads(r.content), "message")}\n')
return
data = json.loads(r.content)
artist_name = get_dict_val(data, "userName")
# 替换画师名中 Windows 不支持的字符
artist_name = artist_name \
.replace('\\','╲').replace('/','/').replace(':',':') \
.replace('*','⚝').replace('?','?').replace('"','\'\'') \
.replace('<','‹').replace('>','›').replace('|','|')
artist_id = get_dict_val(data, "userId")
old_path = f"{ROOTPATH}\{old_id}"
new_path = f"{ROOTPATH}\{artist_name}_{artist_id}"
# 如有漏网之鱼,不进行移动操作
if mkdir_artist(ROOTPATH, artist_name, artist_id, new_id) != False:
shutil.move(old_path, new_path)
if __name__ == '__main__':
multiprocessing.freeze_support()
img_id_list, raw_id_list = get_img_list(ROOTPATH)
# 用一些咱也不太懂的多线程,再加一个进度条显示(感觉不加也没区别
with multiprocessing.Pool(processes=6) as pool:
with tqdm(total=len(img_id_list), ascii=True) as pbar:
for i, _ in tqdm(
enumerate(pool.starmap(start, zip(raw_id_list,
img_id_list)))):
pbar.update()
print("移动完成")
os.system("PAUSE")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment