Created
August 26, 2022 05:37
-
-
Save Xevion/7ad79d501f08ffbe34f9e16382f1bd84 to your computer and use it in GitHub Desktop.
Slug + base64 identifier URL generation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
slug.py | |
Contains operations and functions integral to slug processing and identifier extraction | |
""" | |
from math import ceil | |
from slugify import slugify | |
from typing import Optional | |
import re | |
import base64 | |
use_little: bool = True # Whether or not to use little or big endian. True for little, False for big. | |
use_sign: bool = False # Whether to assume usage of signed values | |
use_128: bool = False # If true, a 128-bit identifier will be assumed | |
general_limit: int = 50 # The general character limit for an identifier loaded slug | |
id_space: int = ceil((128 if use_128 else 64) / 6) # The number of characters the identifier will occupy. 11 or 22 | |
b64_alphabet = "-_0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | |
b64_pattern = rf"[{b64_alphabet}]+" | |
def asciify_identifier(n: int) -> str: | |
"""Converts a 64-bit or 128-bit integer to it's base64 ASCII counterpart string.""" | |
return base64.urlsafe_b64encode(int.to_bytes(n, 16 if use_128 else 8, 'little', signed=use_sign)).rstrip(b'=').decode('ascii') | |
def generate_slug(identifier: int, text: Optional[str]) -> str: | |
"""Generates a slug given an identifier, and optionally, some text.""" | |
ascii_id: str = asciify_identifier(identifier) | |
if text is None: | |
return ascii_id | |
slug_text: str = slugify(text, replacements=[['\'', '']], max_length=20, word_boundary=True) | |
return f'{slug_text}-{ascii_id}' | |
def extract_identifier(slug: str) -> Optional[int]: | |
"""Extracts an integer identifier from a loaded slug.""" | |
slug = slug.rstrip('/') | |
if len(slug) < id_space: | |
return None | |
identifier = slug[-1 * id_space:] | |
if re.fullmatch(b64_pattern, identifier) is not None: | |
decoded: bytes = base64.urlsafe_b64decode(identifier.encode('ascii') + b'==') | |
return int.from_bytes(decoded, 'little', signed=use_sign) | |
if __name__ == '__main__': | |
import random | |
# random.seed('f') | |
n = int.from_bytes(random.randbytes((128 if use_128 else 64) // 8), 'little', signed=use_sign) | |
n_encode = asciify_identifier(n) | |
tests = [ | |
'22', | |
'034y530habugaiusbgasigba', | |
'------=+++===', | |
'' | |
] | |
for test in tests: | |
test += n_encode | |
n_decode = extract_identifier(test) | |
print(f'{n_decode == n} | {test}') | |
s = generate_slug(random.getrandbits(64), 'Rowdy Bash 2017 Bloopers') | |
print(s) | |
print(extract_identifier(s)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment