Skip to content

Instantly share code, notes, and snippets.

@mnixry
Created August 24, 2023 15:27
Show Gist options
  • Save mnixry/d4a0b4d90ee9d2e9bd04bc9d7f2e9b44 to your computer and use it in GitHub Desktop.
Save mnixry/d4a0b4d90ee9d2e9bd04bc9d7f2e9b44 to your computer and use it in GitHub Desktop.
from functools import cached_property
from timeit import timeit
class BaseN:
def __init__(self, charset: str) -> None:
self.charset = charset
self.reverse_charset = {c: i for i, c in enumerate(charset)}
assert len(self.charset) == len(self.reverse_charset), "charset must be unique"
@cached_property
def base(self):
return len(self.charset)
def encode(self, s: bytes):
int_char = int.from_bytes(s, "big")
result = [] # we can use deque to improve performance
while int_char > 0:
result.append(self.charset[int_char % self.base])
int_char //= self.base
return "".join(result)
def decode(self, s: str):
result = 0
for i in reversed(s):
result *= self.base
result += self.reverse_charset[i]
return result.to_bytes((result.bit_length() + 7) // 8, "big")
base3050 = BaseN(
charset="".join(chr(i) for i in range(0x4E00, 0x4E00 + 3050)),
)
target_str = b"Hello World!" * 10
def test():
encoded = base3050.encode(target_str)
decoded = base3050.decode(encoded)
assert decoded == target_str
print(f"{timeit(test, number=10000)*1000:.3f}ms")
encoded = base3050.encode(b"Hello World!")
print(encoded) # 厯嘩劺刊僾呡句姑丂
decoded = base3050.decode(encoded)
print(decoded) # b'Hello World!'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment