Skip to content

Instantly share code, notes, and snippets.

@numberoverzero
Last active July 26, 2019 17:23
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save numberoverzero/d2ae0af4dd0b5fdc7f0d2fec06590418 to your computer and use it in GitHub Desktop.
Save numberoverzero/d2ae0af4dd0b5fdc7f0d2fec06590418 to your computer and use it in GitHub Desktop.
support integral base string representations, especially for lexicographical sorting
def in_base(n: int, alphabet: str) -> str:
b = len(alphabet)
s = []
while n > 0:
d = n % b
s.append(alphabet[d])
n //= b
return "".join(reversed(s))
def from_base(s: str, alphabet: str):
alphabet = {c: i for (i, c) in enumerate(alphabet)}
b = len(alphabet)
n = 0
for i, c in enumerate(reversed(s)):
v = alphabet[c]
n += v * b**i
return n
b64lexi = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz{}"
# 'JI'
print(in_base(1234, lexicographical_b64, 4))
# '}}}}'
print(in_base(64**4-1, lexicographical_b64, 4))
from typing import overload
class IntPacking:
def __init__(self, alphabet):
self._i2c = alphabet
self._c2i = {c: i for (i, c) in enumerate(alphabet)}
self._b = len(alphabet)
@property
def base(self) -> int:
return self._b
@property
def alphabet(self) -> str:
return self._i2c
@overload
def __call__(self, x: str) -> int:
...
@overload
def __call__(self, x: int) -> str:
...
def __call__(self, x):
if isinstance(x, int):
func = self.to_str
elif isinstance(x, str):
func = self.to_int
return func(x)
def to_str(self, n: int) -> str:
s, b, alphabet = [], self._b, self._i2c
while n > 0:
n, d = divmod(n, b)
s.append(alphabet[d])
return "".join(reversed(s))
def to_int(self, s: str) -> int:
n, b, alphabet = 0, self._b, self._c2i
for i, c in enumerate(reversed(s)):
n += alphabet[c] * (b ** i)
return n
p = IntPacking("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz{}")
# 'JI'
p.to_str(1234)
# 1234
p.to_int("JI")
# True
p(p(12345678)) == 12345678
import math
def pack_fns(alphabet):
"""
Usage::
>>> b64lexi = (
... "0123456789"
... "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
... "abcdefghijklmnopqrstuvwxyz"
... "{}"
... )
>>> to_str, to_int = pack_fns(b64lexi)
>>> to_str(1234)
'JI'
>>> to_int('JI')
1234
"""
b = len(alphabet)
b_divisor = math.log(b, 2)
i2c = alphabet
c2i = {c: i for (i, c) in enumerate(alphabet)}
def to_str(n):
i = 0
# unset values in s will be dropped by "".join()
s = [""] * (1 + int(n.bit_length() / b_divisor))
while n > 0:
n, d = divmod(n, b)
s[i] = i2c[d]
i += 1
return "".join(reversed(s))
def to_int(s):
n = 0
for i, c in enumerate(reversed(s)):
n += c2i[c] * (b ** i)
return n
to_str.base, to_str.alphabet = b, i2c
to_int.base, to_int.alphaber = b, i2c
return to_str, to_int
alphabets = {
"hex": "0123456789abcdef",
"base64lexi": (
"0123456789"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"{}"
)
}
if __name__ == "__main__":
import re, sys
cmd, args = sys.argv[0], sys.argv[1:]
if len(args) != 3:
print("usage: {} ALIAS MODE VALUE".format(cmd))
sys.exit(1)
alias, mode, value = args
if alias not in alphabets:
print(f"error: alias must be one of {list(alphabets.keys())}")
sys.exit(1)
if mode not in {"e", "d"}:
print("error: mode must be 'e' or 'd')
sys.exit(1)
to_s, to_i = pack_fns(alphabets[alias])
fn = {"e": to_i, "d": to_s}[mode]
print(fn(value))
@numberoverzero
Copy link
Author

numberoverzero commented Jan 29, 2019

for example, useful for building composite sort keys in DynamoDb to use in begins_with conditions

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment