给 Pixiv 下载的图片重命名
#!/usr/bin/env python | |
#coding:utf-8 | |
# Author: Sg4Dylan --<sg4dylan#gmail.com> | |
# Created: 12/31/2018 | |
# ========================================== | |
# Pixiv 图片重命名 | |
# ========================================== | |
# 使用方法: 把待处理的「文件夹」拖放到脚本上 | |
import os | |
import re | |
import sys | |
import requests | |
from lxml.html import fromstring | |
from tqdm import tqdm | |
import multiprocessing | |
# 是否使用代理 | |
I_AM_CHINESE = True | |
# 读取启动参数 | |
target_root = '' | |
if len(sys.argv) > 1: | |
target_root = sys.argv[1] | |
# 准备 session 并配置代理 | |
proxies = { | |
'http': 'http://127.0.0.1:2080', | |
'https': 'http://127.0.0.1:2080' | |
} | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36' | |
} | |
session = requests.Session() | |
if I_AM_CHINESE: | |
session.proxies.update(proxies) | |
def proc_core(target_path): | |
# 匹配符合命名规则的 pixiv 图片 | |
illust_name = os.path.basename(target_path) | |
illust_dir = os.path.dirname(target_path) | |
re_pattern = r'^(\d{4,})_?' | |
if not re.search(re_pattern, illust_name): | |
return | |
# 抽出 pixiv illust id | |
illust_id = re.search(re_pattern, illust_name).group(1) | |
# 抓取页面 | |
url = f'https://www.pixiv.net/artworks/{illust_id}' | |
r = session.get(url, headers=headers) | |
if r.status_code != 200: | |
print(f'ERROR: {r.status_code} in {illust_name}') | |
return | |
# 解析页面 | |
tree = fromstring(r.content.decode('UTF-8')) | |
# 拿出页面标题部分 | |
illust_raw_title = tree.findtext('.//title') | |
# 去掉标题结尾固定后缀 | |
illust_fin_name = illust_raw_title.replace('のイラスト - pixiv','') | |
# 去掉以 # 开头的标签部分 | |
illust_fin_name = re.sub(r'(#.*? )', '', illust_fin_name) | |
# 替换 Windows 文件系统不支持的字符 | |
illust_fin_name = illust_fin_name \ | |
.replace('\\','╲').replace('/','/').replace(':',':') \ | |
.replace('*','⚝').replace('?','?').replace('"','\'\'') \ | |
.replace('<','‹').replace('>','›').replace('|','|') | |
# 重新构造成最终的文件名前半部分 | |
illust_fin_name = f"「{''.join(illust_fin_name.split('-')[:-1])}」-「{illust_fin_name.split('-')[-1]}」" | |
# 从原文件名中抽取文件名后半部分 | |
proced_name = re.sub(re_pattern, f'{illust_fin_name}_', illust_name) | |
proced_name = proced_name | |
print(illust_fin_name, proced_name) | |
# 检测文件名是否冲突,冲突则递增结尾数字 | |
confict_id = 0 | |
while 1: | |
try: | |
if confict_id == 0: | |
os.rename(target_path,os.path.join(illust_dir, proced_name)) | |
else: | |
os.rename(target_path,os.path.join(illust_dir, re.sub(r'_p\d.', f'_p{confict_id}.', proced_name))) | |
return | |
except FileExistsError: | |
confict_id += 1 | |
def get_file_list(): | |
file_path_list = [] | |
for root, dirs, files in os.walk(target_root): | |
for name in files: | |
file_path_list.append(os.path.join(root, name)) | |
return file_path_list | |
if __name__ == '__main__': | |
multiprocessing.freeze_support() | |
file_path_list = get_file_list() | |
with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as exec_pool: | |
with tqdm(total=len(file_path_list),ascii=True) as pbar: | |
for i, _ in tqdm(enumerate(exec_pool.imap_unordered(proc_core,file_path_list))): | |
pbar.update() | |
os.system('pause') |
This comment has been minimized.
This comment has been minimized.
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
为什么不在保存的时候直接重命名呢?