E绅士翻译注射器💉Wiki 解析实现
直接用 EhTagBuilder 就能拿到 JSON 格式的数据库
参考了 EhTagTranslator 的实现过程
import re
import json
import requests
session = requests.Session()
db_raw_base = ''
db_index = f'{db_raw_base}/'
def db_parse(md_text):
re_pattern = [
db_parsed = {}
parse_raw = []
if[0], md_text):
parse_raw = re.findall(re_pattern[0], md_text)
for item in parse_raw:
item_name = item[0].strip()
if '-' in item_name or '' in item_name:
db_parsed[item_name] = {
'zh_name': item[1].strip(),
'desc': item[2].strip()
return db_parsed
def get_index():
print(f'Get page index...')
r = session.get(db_index)
return db_parse(r.text)
def get_db(index_parsed):
db_total = {}
for k,v in index_parsed.items():
print(f'Fetch & parse: {k}')
cat_url = f'{db_raw_base}/{k}.md'
r = session.get(cat_url)
d = db_parse(r.text)
for i,j in d.items():
db_total[i] = j
return db_total
with open('EhTagWiki.json','wb') as wp:
