-
-
Save s4028600/64d7e01ad156b64dd483f94e4d1b136e to your computer and use it in GitHub Desktop.
PTT 文章網址轉換成看板與aid
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_aid_from_url(url: str) -> (str, str): | |
# from get_aid_from_url in PyPtt | |
# 檢查是否符合 PTT BBS 文章網址格式 | |
pattern = re.compile('https://www.ptt.cc/bbs/[-.\w]+/M.[\d]+.A[.\w]*.html') | |
r = pattern.search(url) | |
if r is None: | |
raise ValueError('url must be www.ptt.cc article url') | |
# 演算法參考 https://www.ptt.cc/man/C_Chat/DE98/DFF5/DB61/M.1419434423.A.DF0.html | |
# aid 字元表 | |
aid_table = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_' | |
board = url[23:] | |
board = board[:board.find('/')] | |
temp = url[url.rfind('/') + 1:].split('.') | |
# print(temp) | |
id_0 = int(temp[1]) # dec | |
aid_0 = '' | |
for _ in range(6): | |
index = id_0 % 64 | |
aid_0 = f'{aid_table[index]}{aid_0}' | |
id_0 = int(id_0 / 64) | |
if temp[3] != 'html': | |
id_1 = int(temp[3], 16) # hex | |
aid_1 = '' | |
for _ in range(2): | |
index = id_1 % 64 | |
aid_1 = f'{aid_table[index]}{aid_1}' | |
id_1 = int(id_1 / 64) | |
else: | |
aid_1 = '00' | |
aid = f'{aid_0}{aid_1}' | |
return board, aid |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment