Skip to content

Instantly share code, notes, and snippets.

@lukestanley
Last active March 26, 2023 14:56
Show Gist options
  • Save lukestanley/a5353995a589297b53144a428dd940e4 to your computer and use it in GitHub Desktop.
Save lukestanley/a5353995a589297b53144a428dd940e4 to your computer and use it in GitHub Desktop.
xuuid 0.1.1 -- Extended Universally Unique Identifiers Extremely large UUIDs that are more certainly unique, descriptive, supported by tooling for short local reference. There is a distinction made between "hard" and "soft" data for an identifier.
# xuuid.py
"""
xuuid 0.1.1 -- Extended Universally Unique Identifiers
Extremely large UUIDs that are:
more certainly unique,
descriptive,
supported by tooling for short local reference.
There is a distinction made between "hard" and "soft" data for an identifier.
"Hard" data is all of the data required for a specific identifier, -- it's unique signature.
the type of the identifier (a string, e.g: "person", "company", "computer motherboard")
identifying information (string, e.g: "Dell Inspiron model")
time of creation of the identifier (a Unix timestamp)
random data to ensure uniqueness (using secrets to provide 32 secure characters)
"Soft" data is supporting data such as:
sourcing information -- a URL where information associated with the identifier can be found
authentication information -- for verifying identity and correctness of some assertion
"AKA"s -- information about what other identifiers are asserted to be equivalent to this identifier
suggested short-naming
additional descriptive information -- additional information about the thing described
additional general information -- for example, suggestions on how to configure a system for use of the thing identified
AUTHORS: Spec by Lion Kimbro, implementation by Luke Stanley.
The returned "number" is a hexadecimal like string, composed of compressed msgpack.
The MsgPack has two objects, the hard dict and the soft dict.
Example usage:
>> import xuuid
>> xuuid.xuuid("Person","TimKindberg",soft_akas=["champignon.net"])
'oZKZJRMJN-s1l0-FoPw-Wcdq-ippr-iEhc-jgwI-4OjX-1RJS-ubez-SbOS-VgPF-sh6Z-t6OW-dTKg-KlVP-bxk27dtfebE'
>> xuuid.parse("9F8oYJtH1-woJE-OBQ9-aRdA-gAfk-ZZJ5-pfH8-oXQi-AwUn-UhWl-vsdn-Cb3c-em1d-PvqW-TrLf-7A7Z-zA3N-Umox-Sgpx-v6aNZqYoN2dpEa")
{
'hard_type': 'Person',
'hard_info': 'TimKindberg',
'creation_time': 1662135109456161279,
'randomness': '3a821d41b7',
'soft_akas': ['champignon.net']
}
"""
import zlib
from sys import exit
from time import time_ns
from collections import deque
from secrets import token_bytes as random_bytes
try:
import msgpack
except ImportError:
print("Error: msgpack not installed. This is required. pip install msgpack")
exit(1)
BASE_TIME = 1662125559016967741
BASE_62_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
def compress(data: bytes) -> bytes:
compressed = zlib.compress(data, level=9)
if len(compressed) < len(data):
return compressed
return data
def decompress(data: bytes) -> bytes:
return zlib.decompress(data)
def add_dashes(data: str) -> str:
char_index = 0
data_with_dashes = ""
for char in data:
is_fourth = char_index % 4 == 0
is_first = char_index == 0
is_within_last_bound = char_index > len(data) - 12
is_within_first_bound = char_index < 8
data_with_dashes += char
if is_fourth and not is_first and not (is_within_last_bound or is_within_first_bound):
data_with_dashes += "-"
char_index += 1
return data_with_dashes
def reversible_string_data_interleaving(data: str, reverse=False) -> str:
# rotates the data by -10 characters using deque, unless reverse is True, in which case it rotates it back:
data = deque(data)
if reverse:
data.rotate(-10)
else:
data.rotate(+10)
return "".join(data)
def bytes_to_base_62(data: bytes) -> str:
"""Convert bytes to base-62 format for easy copy-pasting (unlike base-64)."""
result = ""
num = int.from_bytes(data, byteorder="big")
while num > 0:
result = BASE_62_ALPHABET[num % 62] + result
num //= 62
return result
def base_62_to_bytes(data: str) -> bytes:
"""Convert base-62 format to bytes."""
num = 0
for char in data:
num = num * 62 + BASE_62_ALPHABET.index(char)
return num.to_bytes((num.bit_length() + 7) // 8, byteorder="big")
def xuuid(
hard_type: str,
hard_info: str,
soft_sources: list = None,
soft_auth: list = None,
soft_akas: list = None,
soft_short: str = None,
soft_desc: str = None,
soft_info: str = None,
creation_time: int = None,
randomness: bytes = None,
random_byte_count: int = 5,
use_compression: bool = True,
) -> str:
"""
Create an xuuid.
hard_type -- a string describing the type of the thing being identified
hard_info -- a string describing the thing being identified
soft_sources -- a list of URLs where information about the thing being identified can be found
soft_auth -- a list of authentication information for verifying identity and correctness of some assertion
soft_akas -- a list of other identifiers that are asserted to be equivalent to this identifier
soft_short -- a suggested short-name for the thing being identified
soft_desc -- additional descriptive information about the thing being identified
soft_info -- additional general information about the thing being identified
creation_time -- an integer of the time the identifier was created (defaults to now)
randomness -- WARNING, ONLY USE FOR TESTS OR IF YOU KNOW WHAT YOU ARE DOING! random bytes to ensure uniqueness (defaults to a random byte string)
random_byte_count -- the number of random bytes to use (defaults to 8)
use_compression -- whether to compress the data (defaults to True)
"""
if not creation_time:
creation_time = time_ns()
if not randomness:
randomness = random_bytes(random_byte_count)
seconds = creation_time - BASE_TIME # seconds since base time
hard_list = [hard_type, hard_info, seconds, randomness]
# we make a dict from the optional keyword arguments, and then convert it to a msgpack object
softDataDict = {}
for local_variable_name, variable_value in locals().items():
if local_variable_name.startswith("soft_") and variable_value is not None:
softDataDict[local_variable_name[5:]] = variable_value
uncompressed = msgpack.dumps([hard_list, softDataDict])
if use_compression:
smallest_bytes = compress(uncompressed)
else:
smallest_bytes = uncompressed
based = reversible_string_data_interleaving(
bytes_to_base_62(smallest_bytes))
dashed_data = add_dashes(based)
return dashed_data
def parse(xuuid: str) -> dict:
"""
Parse an xuuid.
xuuid -- the base62 encoded xuuid to parse
"""
# remove all the "-" dashes in the xuuid
xuuid = xuuid.replace("-", "")
uninterleaved = reversible_string_data_interleaving(xuuid, True)
xuuid_bytes = base_62_to_bytes(uninterleaved)
# Try decompressing the data, if it fails, it is not compressed
try:
uncompressed = decompress(xuuid_bytes)
except:
uncompressed = xuuid_bytes
hard_list, soft_dict = msgpack.loads(uncompressed)
# add "soft_" prefix to each key in the soft_dict:
soft_dict = {
"soft_" + key: value for key, value in soft_dict.items()
}
hard_dict = {"hard_type": hard_list[0], "hard_info": hard_list[1],
"creation_time": hard_list[2] + BASE_TIME, "randomness": hard_list[3].hex()}
combined_dict = {**hard_dict, **soft_dict}
return combined_dict
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment