Jane系の専ブラの画像キャッシュ(vch)から特定の板の物を選別するスクリプト。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import glob | |
import json | |
import os | |
import re | |
import shutil | |
import chardet | |
file_dir = os.path.dirname(__file__) + "\\" | |
cache_dir = file_dir + "VwCache\\" | |
dst_dir = file_dir + "VwCache2\\" | |
os.makedirs(dst_dir, exist_ok=True) | |
brd_path = file_dir + "\\jane2ch.brd" | |
brd_json_path = file_dir + "\\jane2ch.brd.json" | |
check_brd_list = [] | |
def get_board_info_dict(): | |
board_info_dict = {"boards": []} | |
if os.path.exists(brd_path): | |
with open(brd_path, encoding="shift-jis") as f: | |
try: | |
while True: | |
line = f.readline() | |
if line: | |
if re.match(r"^\t", line): | |
line = re.sub("(^\t|\n$)", "", line) | |
board_info_list = line.split("\t") | |
board_info_dict["boards"].append({ | |
"domain": board_info_list[0], | |
"dir": board_info_list[1], | |
"name": board_info_list[2], | |
"check": False | |
}) | |
else: | |
break | |
except: | |
return None | |
return board_info_dict | |
else: | |
None | |
def create_json(dict, path): | |
with open(path, "w", encoding="utf-8") as f: | |
json.dump(dict, f, indent=2, ensure_ascii=False) | |
print("jane2ch.brd.jsonを作成しました。") | |
def create_brd_json(): | |
board_info_dict = get_board_info_dict() | |
if board_info_dict is not None: | |
if not os.path.exists(brd_json_path): | |
create_json(board_info_dict, brd_json_path) | |
else: | |
while True: | |
input_str = input("jane2ch.brd.jsonを上書きしますか? (yes/no)") | |
if input_str == "yes" or input_str == "y": | |
create_json(board_info_dict, brd_json_path) | |
break | |
elif input_str =="no" or input_str =="n": | |
break | |
def read_brd_json(): | |
with open(brd_json_path, "r", encoding="utf-8") as f: | |
boards = json.load(f)["boards"] | |
for board in boards: | |
if board["check"]: | |
board["dir_slash"] = "/" + board["dir"] + "/" | |
if "jbbs.shitaraba.net" in board["domain"]: | |
category = board["domain"][board["domain"].rfind("/")+1:] | |
cate_dir = category + "/" + board["dir"] | |
board["cate_dir"] = cate_dir | |
check_brd_list.append(board) | |
def copy_move_vch(header_dict, path, vch_move, vch_noref): | |
file_name = path[path.rfind(os.sep)+1:] | |
dst_path = dst_dir + file_name | |
print("File =",file_name) | |
print("URL =", header_dict["URL"]) | |
if not vch_noref: | |
print("Referer =", header_dict["Referer"]) | |
if vch_move: | |
shutil.move(path, dst_path) | |
else: | |
shutil.copy2(path, dst_path) | |
print("-------------------------------") | |
regex_5ch = r".*\.(2ch|5ch)\.net" | |
regex_sc = r".*\.2ch\.sc" | |
regex_open = r".*\.open2ch\.net" | |
regex_pink = r".*\.bbspink\.com" | |
regex_shitaraba = r"jbbs\.shitaraba\.net" | |
regex_list = [ | |
regex_5ch, regex_sc, regex_open, regex_pink | |
] | |
def check_vch_header(header_dict, path, vch_move, vch_noref): | |
try: | |
referer = header_dict["Referer"] | |
except: | |
if vch_noref: | |
copy_move_vch(header_dict, path, vch_move, vch_noref) | |
return | |
else: | |
return | |
for check_brd in check_brd_list: | |
domain = check_brd["domain"] | |
dir_slash = check_brd["dir_slash"] | |
# 2ch.net, 5ch.net, sc, おーぷん, bbspink | |
for regex in regex_list: | |
if re.search(regex, domain) and re.search(regex, referer): | |
if dir_slash in referer: | |
copy_move_vch(header_dict, path, vch_move, False) | |
return | |
# したらば | |
if re.search(regex_shitaraba, domain) and re.search(regex_shitaraba, referer): | |
if check_brd["cate_dir"] in referer: | |
copy_move_vch(header_dict, path, vch_move, False) | |
return | |
# その他 | |
if domain in referer and dir_slash in referer: | |
if dir_slash in referer: | |
copy_move_vch(header_dict, path, vch_move, False) | |
return | |
def read_vch_header(vch_move, vch_noref): | |
if len(check_brd_list) == 0: | |
print("jane2ch.brd.jsonで目的の板のcheck項目をtrueに変更してください。") | |
return | |
vch_list = glob.glob(cache_dir + "*.vch") | |
for vch in vch_list: | |
file_name = vch[vch.rfind(os.sep)+1:] | |
dst_path = dst_dir + file_name | |
if os.path.exists(dst_path): | |
continue | |
with open(vch, mode="rb") as f: | |
header_end = int.from_bytes(f.read(4), "little") + 1 | |
f.seek(0) | |
header = f.read(header_end) | |
header_encoding = chardet.detect(header)["encoding"] | |
try: | |
header_str = header[4:].decode(header_encoding) | |
except: | |
try: | |
header_str = header.decode("iso-8859-1") | |
except: | |
continue | |
header_str2 = header_str | |
header_dict = {} | |
while True: | |
equal_offset = header_str2.find("=") | |
line_offset = header_str2.find("\r\n") | |
if line_offset == -1: | |
attr = header_str2[:equal_offset] | |
val = header_str2[equal_offset+1:] | |
header_dict[attr] = val | |
break | |
attr = header_str2[:equal_offset] | |
val = header_str2[equal_offset+1:line_offset] | |
header_str2 = header_str2[line_offset+2:] | |
header_dict[attr] = val | |
check_vch_header(header_dict, vch, vch_move, vch_noref) | |
def main(args): | |
if args.json: | |
create_brd_json() | |
exit() | |
if os.path.exists(brd_json_path): | |
read_brd_json() | |
read_vch_header(args.move, args.noref) | |
else: | |
print("引数に-jsonを指定してjane2ch.brd.jsonを作成してください。") | |
if __name__ == "__main__": | |
arg_parser = argparse.ArgumentParser() | |
arg_parser.add_argument("-json", action="store_true", help="jane2ch.brd.jsonを作成") | |
arg_parser.add_argument("-move", action="store_true", help="vchをコピーではなく移動") | |
arg_parser.add_argument("-noref", action="store_true", help="リファラーが登録されていないvchをコピー/移動") | |
args = arg_parser.parse_args() | |
main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment