Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Jane系の専ブラの画像キャッシュ(vch)から特定の板の物を選別するスクリプト。
import argparse
import glob
import json
import os
import re
import shutil
import chardet
file_dir = os.path.dirname(__file__) + "\\"
cache_dir = file_dir + "VwCache\\"
dst_dir = file_dir + "VwCache2\\"
os.makedirs(dst_dir, exist_ok=True)
brd_path = file_dir + "\\jane2ch.brd"
brd_json_path = file_dir + "\\jane2ch.brd.json"
check_brd_list = []
def get_board_info_dict():
board_info_dict = {"boards": []}
if os.path.exists(brd_path):
with open(brd_path, encoding="shift-jis") as f:
try:
while True:
line = f.readline()
if line:
if re.match(r"^\t", line):
line = re.sub("(^\t|\n$)", "", line)
board_info_list = line.split("\t")
board_info_dict["boards"].append({
"domain": board_info_list[0],
"dir": board_info_list[1],
"name": board_info_list[2],
"check": False
})
else:
break
except:
return None
return board_info_dict
else:
None
def create_json(dict, path):
with open(path, "w", encoding="utf-8") as f:
json.dump(dict, f, indent=2, ensure_ascii=False)
print("jane2ch.brd.jsonを作成しました。")
def create_brd_json():
board_info_dict = get_board_info_dict()
if board_info_dict is not None:
if not os.path.exists(brd_json_path):
create_json(board_info_dict, brd_json_path)
else:
while True:
input_str = input("jane2ch.brd.jsonを上書きしますか? (yes/no)")
if input_str == "yes" or input_str == "y":
create_json(board_info_dict, brd_json_path)
break
elif input_str =="no" or input_str =="n":
break
def read_brd_json():
with open(brd_json_path, "r", encoding="utf-8") as f:
boards = json.load(f)["boards"]
for board in boards:
if board["check"]:
board["dir_slash"] = "/" + board["dir"] + "/"
if "jbbs.shitaraba.net" in board["domain"]:
category = board["domain"][board["domain"].rfind("/")+1:]
cate_dir = category + "/" + board["dir"]
board["cate_dir"] = cate_dir
check_brd_list.append(board)
def copy_move_vch(header_dict, path, vch_move, vch_noref):
file_name = path[path.rfind(os.sep)+1:]
dst_path = dst_dir + file_name
print("File =",file_name)
print("URL =", header_dict["URL"])
if not vch_noref:
print("Referer =", header_dict["Referer"])
if vch_move:
shutil.move(path, dst_path)
else:
shutil.copy2(path, dst_path)
print("-------------------------------")
regex_5ch = r".*\.(2ch|5ch)\.net"
regex_sc = r".*\.2ch\.sc"
regex_open = r".*\.open2ch\.net"
regex_pink = r".*\.bbspink\.com"
regex_shitaraba = r"jbbs\.shitaraba\.net"
regex_list = [
regex_5ch, regex_sc, regex_open, regex_pink
]
def check_vch_header(header_dict, path, vch_move, vch_noref):
try:
referer = header_dict["Referer"]
except:
if vch_noref:
copy_move_vch(header_dict, path, vch_move, vch_noref)
return
else:
return
for check_brd in check_brd_list:
domain = check_brd["domain"]
dir_slash = check_brd["dir_slash"]
# 2ch.net, 5ch.net, sc, おーぷん, bbspink
for regex in regex_list:
if re.search(regex, domain) and re.search(regex, referer):
if dir_slash in referer:
copy_move_vch(header_dict, path, vch_move, False)
return
# したらば
if re.search(regex_shitaraba, domain) and re.search(regex_shitaraba, referer):
if check_brd["cate_dir"] in referer:
copy_move_vch(header_dict, path, vch_move, False)
return
# その他
if domain in referer and dir_slash in referer:
if dir_slash in referer:
copy_move_vch(header_dict, path, vch_move, False)
return
def read_vch_header(vch_move, vch_noref):
if len(check_brd_list) == 0:
print("jane2ch.brd.jsonで目的の板のcheck項目をtrueに変更してください。")
return
vch_list = glob.glob(cache_dir + "*.vch")
for vch in vch_list:
file_name = vch[vch.rfind(os.sep)+1:]
dst_path = dst_dir + file_name
if os.path.exists(dst_path):
continue
with open(vch, mode="rb") as f:
header_end = int.from_bytes(f.read(4), "little") + 1
f.seek(0)
header = f.read(header_end)
header_encoding = chardet.detect(header)["encoding"]
try:
header_str = header[4:].decode(header_encoding)
except:
try:
header_str = header.decode("iso-8859-1")
except:
continue
header_str2 = header_str
header_dict = {}
while True:
equal_offset = header_str2.find("=")
line_offset = header_str2.find("\r\n")
if line_offset == -1:
attr = header_str2[:equal_offset]
val = header_str2[equal_offset+1:]
header_dict[attr] = val
break
attr = header_str2[:equal_offset]
val = header_str2[equal_offset+1:line_offset]
header_str2 = header_str2[line_offset+2:]
header_dict[attr] = val
check_vch_header(header_dict, vch, vch_move, vch_noref)
def main(args):
if args.json:
create_brd_json()
exit()
if os.path.exists(brd_json_path):
read_brd_json()
read_vch_header(args.move, args.noref)
else:
print("引数に-jsonを指定してjane2ch.brd.jsonを作成してください。")
if __name__ == "__main__":
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("-json", action="store_true", help="jane2ch.brd.jsonを作成")
arg_parser.add_argument("-move", action="store_true", help="vchをコピーではなく移動")
arg_parser.add_argument("-noref", action="store_true", help="リファラーが登録されていないvchをコピー/移動")
args = arg_parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment