Skip to content

Instantly share code, notes, and snippets.

@Sg4Dylan
Created January 5, 2019 04:10
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save Sg4Dylan/9342e51bbef483f6f0d12dff88b1ddb3 to your computer and use it in GitHub Desktop.
E绅士翻译注射器💉Wiki 解析实现
'''
E绅士翻译注射器💉Wiki 解析实现
直接用 EhTagBuilder 就能拿到 JSON 格式的数据库
本脚本是为方便集成做的简易版解析实现
参考了 EhTagTranslator 的实现过程
'''
import re
import json
import requests
session = requests.Session()
db_raw_base = 'https://raw.githubusercontent.com/wiki/Mapaler/EhTagTranslator/database'
db_index = f'{db_raw_base}/rows.md'
def db_parse(md_text):
re_pattern = [
'\|(.*?)\|(.*?)\|(.*?)\|(.*?)\|'
]
db_parsed = {}
parse_raw = []
if re.search(re_pattern[0], md_text):
parse_raw = re.findall(re_pattern[0], md_text)
for item in parse_raw:
item_name = item[0].strip()
if '-' in item_name or '英' in item_name:
continue
db_parsed[item_name] = {
'zh_name': item[1].strip(),
'desc': item[2].strip()
}
return db_parsed
def get_index():
print(f'Get page index...')
r = session.get(db_index)
return db_parse(r.text)
def get_db(index_parsed):
db_total = {}
for k,v in index_parsed.items():
print(f'Fetch & parse: {k}')
cat_url = f'{db_raw_base}/{k}.md'
r = session.get(cat_url)
d = db_parse(r.text)
for i,j in d.items():
db_total[i] = j
return db_total
with open('EhTagWiki.json','wb') as wp:
wp.write(json.dumps(get_db(get_index()),ensure_ascii=False,indent=4).encode('UTF-8'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment