Last active
March 26, 2023 14:56
-
-
Save lukestanley/a5353995a589297b53144a428dd940e4 to your computer and use it in GitHub Desktop.
xuuid 0.1.1 -- Extended Universally Unique Identifiers Extremely large UUIDs that are more certainly unique, descriptive, supported by tooling for short local reference. There is a distinction made between "hard" and "soft" data for an identifier.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# xuuid.py | |
""" | |
xuuid 0.1.1 -- Extended Universally Unique Identifiers | |
Extremely large UUIDs that are: | |
more certainly unique, | |
descriptive, | |
supported by tooling for short local reference. | |
There is a distinction made between "hard" and "soft" data for an identifier. | |
"Hard" data is all of the data required for a specific identifier, -- it's unique signature. | |
the type of the identifier (a string, e.g: "person", "company", "computer motherboard") | |
identifying information (string, e.g: "Dell Inspiron model") | |
time of creation of the identifier (a Unix timestamp) | |
random data to ensure uniqueness (using secrets to provide 32 secure characters) | |
"Soft" data is supporting data such as: | |
sourcing information -- a URL where information associated with the identifier can be found | |
authentication information -- for verifying identity and correctness of some assertion | |
"AKA"s -- information about what other identifiers are asserted to be equivalent to this identifier | |
suggested short-naming | |
additional descriptive information -- additional information about the thing described | |
additional general information -- for example, suggestions on how to configure a system for use of the thing identified | |
AUTHORS: Spec by Lion Kimbro, implementation by Luke Stanley. | |
The returned "number" is a hexadecimal like string, composed of compressed msgpack. | |
The MsgPack has two objects, the hard dict and the soft dict. | |
Example usage: | |
>> import xuuid | |
>> xuuid.xuuid("Person","TimKindberg",soft_akas=["champignon.net"]) | |
'oZKZJRMJN-s1l0-FoPw-Wcdq-ippr-iEhc-jgwI-4OjX-1RJS-ubez-SbOS-VgPF-sh6Z-t6OW-dTKg-KlVP-bxk27dtfebE' | |
>> xuuid.parse("9F8oYJtH1-woJE-OBQ9-aRdA-gAfk-ZZJ5-pfH8-oXQi-AwUn-UhWl-vsdn-Cb3c-em1d-PvqW-TrLf-7A7Z-zA3N-Umox-Sgpx-v6aNZqYoN2dpEa") | |
{ | |
'hard_type': 'Person', | |
'hard_info': 'TimKindberg', | |
'creation_time': 1662135109456161279, | |
'randomness': '3a821d41b7', | |
'soft_akas': ['champignon.net'] | |
} | |
""" | |
import zlib | |
from sys import exit | |
from time import time_ns | |
from collections import deque | |
from secrets import token_bytes as random_bytes | |
try: | |
import msgpack | |
except ImportError: | |
print("Error: msgpack not installed. This is required. pip install msgpack") | |
exit(1) | |
BASE_TIME = 1662125559016967741 | |
BASE_62_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | |
def compress(data: bytes) -> bytes: | |
compressed = zlib.compress(data, level=9) | |
if len(compressed) < len(data): | |
return compressed | |
return data | |
def decompress(data: bytes) -> bytes: | |
return zlib.decompress(data) | |
def add_dashes(data: str) -> str: | |
char_index = 0 | |
data_with_dashes = "" | |
for char in data: | |
is_fourth = char_index % 4 == 0 | |
is_first = char_index == 0 | |
is_within_last_bound = char_index > len(data) - 12 | |
is_within_first_bound = char_index < 8 | |
data_with_dashes += char | |
if is_fourth and not is_first and not (is_within_last_bound or is_within_first_bound): | |
data_with_dashes += "-" | |
char_index += 1 | |
return data_with_dashes | |
def reversible_string_data_interleaving(data: str, reverse=False) -> str: | |
# rotates the data by -10 characters using deque, unless reverse is True, in which case it rotates it back: | |
data = deque(data) | |
if reverse: | |
data.rotate(-10) | |
else: | |
data.rotate(+10) | |
return "".join(data) | |
def bytes_to_base_62(data: bytes) -> str: | |
"""Convert bytes to base-62 format for easy copy-pasting (unlike base-64).""" | |
result = "" | |
num = int.from_bytes(data, byteorder="big") | |
while num > 0: | |
result = BASE_62_ALPHABET[num % 62] + result | |
num //= 62 | |
return result | |
def base_62_to_bytes(data: str) -> bytes: | |
"""Convert base-62 format to bytes.""" | |
num = 0 | |
for char in data: | |
num = num * 62 + BASE_62_ALPHABET.index(char) | |
return num.to_bytes((num.bit_length() + 7) // 8, byteorder="big") | |
def xuuid( | |
hard_type: str, | |
hard_info: str, | |
soft_sources: list = None, | |
soft_auth: list = None, | |
soft_akas: list = None, | |
soft_short: str = None, | |
soft_desc: str = None, | |
soft_info: str = None, | |
creation_time: int = None, | |
randomness: bytes = None, | |
random_byte_count: int = 5, | |
use_compression: bool = True, | |
) -> str: | |
""" | |
Create an xuuid. | |
hard_type -- a string describing the type of the thing being identified | |
hard_info -- a string describing the thing being identified | |
soft_sources -- a list of URLs where information about the thing being identified can be found | |
soft_auth -- a list of authentication information for verifying identity and correctness of some assertion | |
soft_akas -- a list of other identifiers that are asserted to be equivalent to this identifier | |
soft_short -- a suggested short-name for the thing being identified | |
soft_desc -- additional descriptive information about the thing being identified | |
soft_info -- additional general information about the thing being identified | |
creation_time -- an integer of the time the identifier was created (defaults to now) | |
randomness -- WARNING, ONLY USE FOR TESTS OR IF YOU KNOW WHAT YOU ARE DOING! random bytes to ensure uniqueness (defaults to a random byte string) | |
random_byte_count -- the number of random bytes to use (defaults to 8) | |
use_compression -- whether to compress the data (defaults to True) | |
""" | |
if not creation_time: | |
creation_time = time_ns() | |
if not randomness: | |
randomness = random_bytes(random_byte_count) | |
seconds = creation_time - BASE_TIME # seconds since base time | |
hard_list = [hard_type, hard_info, seconds, randomness] | |
# we make a dict from the optional keyword arguments, and then convert it to a msgpack object | |
softDataDict = {} | |
for local_variable_name, variable_value in locals().items(): | |
if local_variable_name.startswith("soft_") and variable_value is not None: | |
softDataDict[local_variable_name[5:]] = variable_value | |
uncompressed = msgpack.dumps([hard_list, softDataDict]) | |
if use_compression: | |
smallest_bytes = compress(uncompressed) | |
else: | |
smallest_bytes = uncompressed | |
based = reversible_string_data_interleaving( | |
bytes_to_base_62(smallest_bytes)) | |
dashed_data = add_dashes(based) | |
return dashed_data | |
def parse(xuuid: str) -> dict: | |
""" | |
Parse an xuuid. | |
xuuid -- the base62 encoded xuuid to parse | |
""" | |
# remove all the "-" dashes in the xuuid | |
xuuid = xuuid.replace("-", "") | |
uninterleaved = reversible_string_data_interleaving(xuuid, True) | |
xuuid_bytes = base_62_to_bytes(uninterleaved) | |
# Try decompressing the data, if it fails, it is not compressed | |
try: | |
uncompressed = decompress(xuuid_bytes) | |
except: | |
uncompressed = xuuid_bytes | |
hard_list, soft_dict = msgpack.loads(uncompressed) | |
# add "soft_" prefix to each key in the soft_dict: | |
soft_dict = { | |
"soft_" + key: value for key, value in soft_dict.items() | |
} | |
hard_dict = {"hard_type": hard_list[0], "hard_info": hard_list[1], | |
"creation_time": hard_list[2] + BASE_TIME, "randomness": hard_list[3].hex()} | |
combined_dict = {**hard_dict, **soft_dict} | |
return combined_dict |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment