Last active
August 24, 2020 05:38
-
-
Save ninjaahhh/69235300116472bf9358baadabcd3e23 to your computer and use it in GitHub Desktop.
Convert between token IDs and names
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
TOKEN_BASE = 36 | |
TOKEN_ID_MAX = 4873763662273663091 # ZZZZZZZZZZZZ | |
TOKEN_MAX = "ZZZZZZZZZZZZ" | |
def token_char_encode(char: str) -> int: | |
if ord(char) >= ord("A") and ord(char) <= ord("Z"): | |
return 10 + ord(char) - ord("A") | |
if ord(char) >= ord("0") and ord(char) <= ord("9"): | |
return ord(char) - ord("0") | |
raise AssertionError("unknown character {}".format(char)) | |
def token_char_decode(id: int) -> str: | |
assert TOKEN_BASE > id >= 0, "invalid char" | |
if id < 10: | |
return chr(ord("0") + id) | |
return chr(ord("A") + id - 10) | |
def token_id_encode(name: str) -> int: | |
""" | |
encode native token name to uint64 | |
""" | |
assert len(name) < 13, "name too long" | |
assert re.match(r"^[0-9A-Z]+$", name), "name can only contain 0-9, A-Z" | |
id = token_char_encode(name[-1]) | |
base = TOKEN_BASE | |
for c in name[-2::-1]: | |
id += base * (token_char_encode(c) + 1) | |
base *= TOKEN_BASE | |
return id | |
def token_id_decode(id: int) -> str: | |
""" | |
decode native token name from uint64 | |
""" | |
assert 0 <= id <= TOKEN_ID_MAX, "id too big or negative" | |
name = token_char_decode(id % TOKEN_BASE) | |
id = id // TOKEN_BASE - 1 | |
while id >= 0: | |
name += token_char_decode(id % TOKEN_BASE) | |
id = id // TOKEN_BASE - 1 | |
return name[::-1] | |
if __name__ == '__main__': | |
print(token_id_encode("QFIRE")) # 46121774 | |
print(token_id_decode(46121774)) # QFIRE |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment