Skip to content

Instantly share code, notes, and snippets.

@imptype
Last active April 18, 2024 23:01
Show Gist options
  • Save imptype/c03da1cffc668b614c50557d441dd54c to your computer and use it in GitHub Desktop.
Save imptype/c03da1cffc668b614c50557d441dd54c to your computer and use it in GitHub Desktop.
Compress text to Unicode text for Discord Custom IDs.
# alternative to convert number strings to printable ascii, for smaller deta base strings, ' ' space reserved
ascii_chars = list(r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''')
def power_sum(values, base, offset = 0):
return sum(value * base ** (index + offset) for index, value in enumerate(values))
def convert_text(text, chars):
base = len(chars) + 1
chars = {char : index + 1 for index, char in enumerate(chars)}
temp = []
result = ''
for char in text:
value = chars[char] # indexerror = missing that char in char set
if value * base ** len(temp) + power_sum(temp, base, 1) > len(ascii_chars):
result += ascii_chars[power_sum(temp, base)]
temp = [value]
else:
temp.append(value)
result += ascii_chars[power_sum(temp, base)]
return result
def revert_text(text, chars):
base = len(chars) + 1
chars = list(chars)
result = ''
for char in text:
value = ascii_chars.index(char)
while value:
result += chars[(value % base) - 1]
value //= base
return result
text = '10123456789012345679999999999999'
chars = ''.join(set(text))
print('Base:', len(chars), '\n')
print('Start text ({}): {}'.format(len(text), text), '\n')
text = convert_text(text, chars)
print('Ascii text ({}): {}'.format(len(text), text), '\n')
text = revert_text(text, chars)
print('Revert text ({}): {}'.format(len(text), text), '\n')
Base: 11
Start text (500): 1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:
Unicode text (100): π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘π© ˜π‘—‘π© šπΉ·‘
Revert text (500): 1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:1012345678901234567:
MAX_UNICODE = 1_114_111 # 0x10FFFF is max unicode code point
START_SURROGATE = 55_296 # 0xD800 to 0xDFFF are surrogates, unprintable/pointers
MAX_SURROGATE = 2_048 # 0x800
def validate_unicode(value, reverse = False):
if value >= START_SURROGATE:
value -= MAX_SURROGATE * (2 * int(reverse) - 1) # 1 when true, -1 when false
return value
def power_sum(values, base, offset = 0):
return sum(value * base ** (index + offset) for index, value in enumerate(values))
def convert_text(text, chars):
base = len(chars) + 1
chars = {char : index + 1 for index, char in enumerate(chars)}
temp = []
result = ''
for char in text:
value = chars[char] # indexerror = missing that char in char set
if value * base ** len(temp) + power_sum(temp, base, 1) > MAX_UNICODE:
result += chr(validate_unicode(power_sum(temp, base)))
temp = [value]
else:
temp.append(value)
result += chr(validate_unicode(power_sum(temp, base)))
return result
def revert_text(text, chars):
base = len(chars) + 1
chars = list(chars)
result = ''
for char in text:
value = validate_unicode(ord(char), True)
while value:
result += chars[(value % base) - 1]
value //= base
return result
text = '1012345678901234567:' * 25 # storing 25 user ids in a custom id
chars = ''.join(set(text))
print('Base:', len(chars), '\n')
print('Start text ({}): {}'.format(len(text), text), '\n')
text = convert_text(text, chars)
print('Unicode text ({}): {}'.format(len(text), text), '\n')
text = revert_text(text, chars)
print('Revert text ({}): {}'.format(len(text), text), '\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment