Skip to content

Instantly share code, notes, and snippets.

@Sg4Dylan
Last active November 13, 2020 13:53
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Sg4Dylan/6f678e7bef35c6985082750afd291dd5 to your computer and use it in GitHub Desktop.
Save Sg4Dylan/6f678e7bef35c6985082750afd291dd5 to your computer and use it in GitHub Desktop.
给 Pixiv 下载的图片重命名
#!/usr/bin/env python
#coding:utf-8
# Author: Sg4Dylan --<sg4dylan#gmail.com>
# Created: 12/31/2018
# ==========================================
# Pixiv 图片重命名
# ==========================================
# 使用方法: 把待处理的「文件夹」拖放到脚本上
import os
import re
import sys
import requests
from lxml.html import fromstring
from tqdm import tqdm
import multiprocessing
# 是否使用代理
I_AM_CHINESE = True
# 读取启动参数
target_root = ''
if len(sys.argv) > 1:
target_root = sys.argv[1]
# 准备 session 并配置代理
proxies = {
'http': 'http://127.0.0.1:2080',
'https': 'http://127.0.0.1:2080'
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
session = requests.Session()
if I_AM_CHINESE:
session.proxies.update(proxies)
def proc_core(target_path):
# 匹配符合命名规则的 pixiv 图片
illust_name = os.path.basename(target_path)
illust_dir = os.path.dirname(target_path)
re_pattern = r'^(\d{4,})_?'
if not re.search(re_pattern, illust_name):
return
# 抽出 pixiv illust id
illust_id = re.search(re_pattern, illust_name).group(1)
# 抓取页面
url = f'https://www.pixiv.net/artworks/{illust_id}'
r = session.get(url, headers=headers)
if r.status_code != 200:
print(f'ERROR: {r.status_code} in {illust_name}')
return
# 解析页面
tree = fromstring(r.content.decode('UTF-8'))
# 拿出页面标题部分
illust_raw_title = tree.findtext('.//title')
# 去掉标题结尾固定后缀
illust_fin_name = illust_raw_title.replace('のイラスト - pixiv','')
# 去掉以 # 开头的标签部分
illust_fin_name = re.sub(r'(#.*? )', '', illust_fin_name)
# 替换 Windows 文件系统不支持的字符
illust_fin_name = illust_fin_name \
.replace('\\','╲').replace('/','/').replace(':',':') \
.replace('*','⚝').replace('?','?').replace('"','\'\'') \
.replace('<','‹').replace('>','›').replace('|','|')
# 重新构造成最终的文件名前半部分
illust_fin_name = f"「{''.join(illust_fin_name.split('-')[:-1])}」-「{illust_fin_name.split('-')[-1]}」"
# 从原文件名中抽取文件名后半部分
proced_name = re.sub(re_pattern, f'{illust_fin_name}_', illust_name)
proced_name = proced_name
print(illust_fin_name, proced_name)
# 检测文件名是否冲突,冲突则递增结尾数字
confict_id = 0
while 1:
try:
if confict_id == 0:
os.rename(target_path,os.path.join(illust_dir, proced_name))
else:
os.rename(target_path,os.path.join(illust_dir, re.sub(r'_p\d.', f'_p{confict_id}.', proced_name)))
return
except FileExistsError:
confict_id += 1
def get_file_list():
file_path_list = []
for root, dirs, files in os.walk(target_root):
for name in files:
file_path_list.append(os.path.join(root, name))
return file_path_list
if __name__ == '__main__':
multiprocessing.freeze_support()
file_path_list = get_file_list()
with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as exec_pool:
with tqdm(total=len(file_path_list),ascii=True) as pbar:
for i, _ in tqdm(enumerate(exec_pool.imap_unordered(proc_core,file_path_list))):
pbar.update()
os.system('pause')
@Mapaler
Copy link

Mapaler commented May 30, 2019

为什么不在保存的时候直接重命名呢?

@Sg4Dylan
Copy link
Author

为什么不在保存的时候直接重命名呢?

  1. UserScript 开发经常要面对网页改版,咱实在摸不动了;
  2. UserScript 不能修改拖放保存的文件名;
  3. UserScript 不能对之前未保存的图片改名。

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment