Skip to content

Instantly share code, notes, and snippets.

@thevickypedia
Last active May 4, 2023 15:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thevickypedia/e6ef4de2735168f0134a1fbc50a0cfb8 to your computer and use it in GitHub Desktop.
Save thevickypedia/e6ef4de2735168f0134a1fbc50a0cfb8 to your computer and use it in GitHub Desktop.
Tokenize a string using python
import base64
import binascii
import string
UNICODE_PREFIX = base64.b64decode(b'XA==').decode(encoding="ascii") + \
string.ascii_letters[20] + string.digits[:1] * 2
input_text = 'hello world'
def hex_encode(text):
return UNICODE_PREFIX + UNICODE_PREFIX.join(binascii.hexlify(
data=text.encode(encoding="utf-8"), sep="-"
).decode(encoding="utf-8").split(sep="-"))
def hex_decode(text):
return bytes(text, "utf-8").decode(encoding="unicode_escape")
def base_encode(text, urlsafe: bool = False):
if urlsafe:
return base64.b64encode(text.encode(), altchars=b'xx').decode()
return base64.b64encode(text.encode()).decode()
def base_decode(text):
return base64.b64decode(text).decode()
def int_encode(text):
return int.from_bytes(text.encode('utf-8'), 'little')
def int_decode(text):
recovered_bytes = text.to_bytes((text.bit_length() + 7) // 8, 'little')
return recovered_bytes.decode('utf-8')
encoded = hex_encode(input_text)
print(f"Hex encoded: {encoded}")
decoded = hex_decode(encoded)
assert decoded == input_text
encoded = base_encode(input_text)
print(f"Base64 encoded: {encoded}")
decoded = base_decode(encoded)
assert decoded == input_text
encoded = int_encode(input_text)
print(f"Int encoded: {encoded}")
decoded = int_decode(encoded)
assert decoded == input_text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment