Skip to content

Instantly share code, notes, and snippets.

@kood-r
Last active June 9, 2021 15:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kood-r/5ec77cc6cf5446c7e5bab05901247158 to your computer and use it in GitHub Desktop.
Save kood-r/5ec77cc6cf5446c7e5bab05901247158 to your computer and use it in GitHub Desktop.
Jane系の専ブラの画像キャッシュ(vch)から特定の板の物を選別するスクリプト。
import argparse
import glob
import json
import os
import re
import shutil
import chardet
file_dir = os.path.dirname(__file__) + "\\"
cache_dir = file_dir + "VwCache\\"
dst_dir = file_dir + "VwCache2\\"
os.makedirs(dst_dir, exist_ok=True)
brd_path = file_dir + "\\jane2ch.brd"
brd_json_path = file_dir + "\\jane2ch.brd.json"
check_brd_list = []
def get_board_info_dict():
board_info_dict = {"boards": []}
if os.path.exists(brd_path):
with open(brd_path, encoding="shift-jis") as f:
try:
while True:
line = f.readline()
if line:
if re.match(r"^\t", line):
line = re.sub("(^\t|\n$)", "", line)
board_info_list = line.split("\t")
board_info_dict["boards"].append({
"domain": board_info_list[0],
"dir": board_info_list[1],
"name": board_info_list[2],
"check": False
})
else:
break
except:
return None
return board_info_dict
else:
None
def create_json(dict, path):
with open(path, "w", encoding="utf-8") as f:
json.dump(dict, f, indent=2, ensure_ascii=False)
print("jane2ch.brd.jsonを作成しました。")
def create_brd_json():
board_info_dict = get_board_info_dict()
if board_info_dict is not None:
if not os.path.exists(brd_json_path):
create_json(board_info_dict, brd_json_path)
else:
while True:
input_str = input("jane2ch.brd.jsonを上書きしますか? (yes/no)")
if input_str == "yes" or input_str == "y":
create_json(board_info_dict, brd_json_path)
break
elif input_str =="no" or input_str =="n":
break
def read_brd_json():
with open(brd_json_path, "r", encoding="utf-8") as f:
boards = json.load(f)["boards"]
for board in boards:
if board["check"]:
board["dir_slash"] = "/" + board["dir"] + "/"
if "jbbs.shitaraba.net" in board["domain"]:
category = board["domain"][board["domain"].rfind("/")+1:]
cate_dir = category + "/" + board["dir"]
board["cate_dir"] = cate_dir
check_brd_list.append(board)
def copy_move_vch(header_dict, path, vch_move, vch_noref):
file_name = path[path.rfind(os.sep)+1:]
dst_path = dst_dir + file_name
print("File =",file_name)
print("URL =", header_dict["URL"])
if not vch_noref:
print("Referer =", header_dict["Referer"])
if vch_move:
shutil.move(path, dst_path)
else:
shutil.copy2(path, dst_path)
print("-------------------------------")
regex_5ch = r".*\.(2ch|5ch)\.net"
regex_sc = r".*\.2ch\.sc"
regex_open = r".*\.open2ch\.net"
regex_pink = r".*\.bbspink\.com"
regex_shitaraba = r"jbbs\.shitaraba\.net"
regex_list = [
regex_5ch, regex_sc, regex_open, regex_pink
]
def check_vch_header(header_dict, path, vch_move, vch_noref):
try:
referer = header_dict["Referer"]
except:
if vch_noref:
copy_move_vch(header_dict, path, vch_move, vch_noref)
return
else:
return
for check_brd in check_brd_list:
domain = check_brd["domain"]
dir_slash = check_brd["dir_slash"]
# 2ch.net, 5ch.net, sc, おーぷん, bbspink
for regex in regex_list:
if re.search(regex, domain) and re.search(regex, referer):
if dir_slash in referer:
copy_move_vch(header_dict, path, vch_move, False)
return
# したらば
if re.search(regex_shitaraba, domain) and re.search(regex_shitaraba, referer):
if check_brd["cate_dir"] in referer:
copy_move_vch(header_dict, path, vch_move, False)
return
# その他
if domain in referer and dir_slash in referer:
if dir_slash in referer:
copy_move_vch(header_dict, path, vch_move, False)
return
def read_vch_header(vch_move, vch_noref):
if len(check_brd_list) == 0:
print("jane2ch.brd.jsonで目的の板のcheck項目をtrueに変更してください。")
return
vch_list = glob.glob(cache_dir + "*.vch")
for vch in vch_list:
file_name = vch[vch.rfind(os.sep)+1:]
dst_path = dst_dir + file_name
if os.path.exists(dst_path):
continue
with open(vch, mode="rb") as f:
header_end = int.from_bytes(f.read(4), "little") + 1
f.seek(0)
header = f.read(header_end)
header_encoding = chardet.detect(header)["encoding"]
try:
header_str = header[4:].decode(header_encoding)
except:
try:
header_str = header.decode("iso-8859-1")
except:
continue
header_str2 = header_str
header_dict = {}
while True:
equal_offset = header_str2.find("=")
line_offset = header_str2.find("\r\n")
if line_offset == -1:
attr = header_str2[:equal_offset]
val = header_str2[equal_offset+1:]
header_dict[attr] = val
break
attr = header_str2[:equal_offset]
val = header_str2[equal_offset+1:line_offset]
header_str2 = header_str2[line_offset+2:]
header_dict[attr] = val
check_vch_header(header_dict, vch, vch_move, vch_noref)
def main(args):
if args.json:
create_brd_json()
exit()
if os.path.exists(brd_json_path):
read_brd_json()
read_vch_header(args.move, args.noref)
else:
print("引数に-jsonを指定してjane2ch.brd.jsonを作成してください。")
if __name__ == "__main__":
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("-json", action="store_true", help="jane2ch.brd.jsonを作成")
arg_parser.add_argument("-move", action="store_true", help="vchをコピーではなく移動")
arg_parser.add_argument("-noref", action="store_true", help="リファラーが登録されていないvchをコピー/移動")
args = arg_parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment