Skip to content

Instantly share code, notes, and snippets.

@khellang
Last active April 18, 2023 07:37
Show Gist options
  • Save khellang/5061e4b4206fad50cf6d8a35d134fbdb to your computer and use it in GitHub Desktop.
Save khellang/5061e4b4206fad50cf6d8a35d134fbdb to your computer and use it in GitHub Desktop.
// MIT License
//
// Copyright (c) 2018 Kristian Hellang
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
using System;
using System.Runtime.CompilerServices;
using System.Text;
using static System.BitConverter;
public static class CompactGuid
{
private static readonly byte[] Alphabet = Encoding.ASCII.GetBytes("0123456789abcdefghjkmnpqrstvwxyz");
private static readonly string EmptyString = new string('0', Length);
private static readonly int[] AsciiMapping = GenerateAsciiMapping();
private const int Length = 26;
/// <summary>
/// Produces a 26-character, Base32-encoded representation of a <see cref="Guid"/>.
/// </summary>
/// <remarks>
/// This has a bunch of nice characteristics:
/// - Safe without encoding (uses only characters from ASCII)
/// - Avoids ambiguous characters (i/I/l/L/o/O/0)
/// - Easy for humans to read and pronounce
/// - Supports full UUID range (128 bits)
/// - Safe for URLs and file names
/// - Case-insensitive
/// - 30% smaller
/// </remarks>
/// <param name="value">The <see cref="Guid"/> to encode.</param>
/// <returns>A 26-character, Base32-encoded <see cref="string"/>.</returns>
public static string ToCompactString(this Guid value)
{
if (value == Guid.Empty)
{
return EmptyString;
}
Span<byte> bytes = stackalloc byte[Length];
TryWriteUuid(bytes, value);
var hi = ToUInt64(bytes.Slice(0, sizeof(long)));
var lo = ToUInt64(bytes.Slice(sizeof(long)));
EncodeUInt64(bytes.Slice(0, Length / 2), hi);
EncodeUInt64(bytes.Slice(Length / 2), lo);
return Encoding.ASCII.GetString(bytes);
}
/// <summary>
/// Parses a 26-character, Base32-encoded representation of a <see cref="Guid"/>.
/// </summary>
/// <param name="chars">The characters to parse.</param>
/// <returns>The parsed <see cref="Guid"/>.</returns>
/// <exception cref="FormatException">If the characters represents an invalid compact <see cref="Guid"/> string.</exception>
public static Guid Parse(ReadOnlySpan<char> chars)
{
return TryParse(chars, out var result) ? result : throw new FormatException("Invalid compact GUID format.");
}
/// <summary>
/// Tries to parse a 26-character, Base32-encoded representation of a <see cref="Guid"/>.
/// </summary>
/// <param name="chars">The characters to parse.</param>
/// <param name="result">The parsed <see cref="Guid"/>.</param>
/// <returns>Returns <c>true</c> if the parsing succeeded, <c>false</c> otherwise.</returns>
public static bool TryParse(ReadOnlySpan<char> chars, out Guid result)
{
if (!IsValid(chars))
{
result = default;
return false;
}
Span<byte> bytes = stackalloc byte[Length];
Encoding.ASCII.GetBytes(chars, bytes);
return TryParse(bytes, out result) || Guid.TryParse(chars, out result);
}
private static bool IsValid(ReadOnlySpan<char> chars)
{
if (chars.Length != Length)
{
return false;
}
for (var i = 0; i < chars.Length; i++)
{
if (chars[i] >= AsciiMapping.Length)
{
return false; // Not ASCII.
}
}
return true;
}
private static bool TryParse(Span<byte> bytes, out Guid result)
{
result = default;
return TryDecodeUInt64(bytes.Slice(0, Length / 2), out var hi)
&& TryDecodeUInt64(bytes.Slice(Length / 2), out var lo)
&& TryWriteBytes(bytes.Slice(0, sizeof(long)), hi)
&& TryWriteBytes(bytes.Slice(sizeof(long)), lo)
&& TryReadUuid(bytes, out result);
}
private static void EncodeUInt64(Span<byte> bytes, ulong result)
{
var index = 0;
// Because a GUID is 128 bits and 26 characters with 5 bits
// each is 130, we limit the 1st and 13th character to 4 bits (hex).
bytes[index++] = Alphabet[(int)(result >> 60)];
result <<= 4;
while (index < bytes.Length)
{
// Each following character carries 5 bits each.
bytes[index++] = Alphabet[(int)(result >> 59)];
result <<= 5;
}
}
private static bool TryDecodeUInt64(Span<byte> bytes, out ulong result)
{
result = 0;
for (var i = 0; i < bytes.Length; i++)
{
var value = AsciiMapping[bytes[i]];
if (value == -1)
{
return false; // Invalid ASCII character.
}
result = (result << 5) | (uint)value;
}
return true;
}
private static int[] GenerateAsciiMapping()
{
const char start = '\x00', end = '\x7F';
var mapping = new int[end - start + 1];
for (var i = start; i <= end; i++)
{
mapping[i] = Array.IndexOf(Alphabet, (byte)char.ToLower(i));
}
mapping['o'] = mapping['O'] = 0;
mapping['i'] = mapping['I'] = mapping['l'] = mapping['L'] = 1;
return mapping;
}
private static bool TryWriteUuid(Span<byte> bytes, Guid value)
{
if (!value.TryWriteBytes(bytes))
{
return false;
}
bytes.Rotate(0, 6, 2, 4);
bytes.Rotate(1, 7, 3, 5);
bytes.Swap(8, 15);
bytes.Swap(9, 14);
bytes.Swap(10, 13);
bytes.Swap(11, 12);
return true;
}
private static bool TryReadUuid(Span<byte> bytes, out Guid result)
{
const int length = 16;
if (bytes.Length < length)
{
result = default;
return false;
}
bytes.Rotate(0, 4, 2, 6);
bytes.Rotate(1, 5, 3, 7);
bytes.Swap(8, 15);
bytes.Swap(9, 14);
bytes.Swap(10, 13);
bytes.Swap(11, 12);
result = new Guid(bytes.Slice(0, length));
return true;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void Rotate(this Span<byte> span, int a, int b, int c, int d)
{
var tmp = span[a];
span[a] = span[b];
span[b] = span[c];
span[c] = span[d];
span[d] = tmp;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void Swap(this Span<byte> span, int left, int right)
{
var tmp = span[left];
span[left] = span[right];
span[right] = tmp;
}
}
@aidapsibr
Copy link

Gorgeous.

@nadjibus
Copy link

Thank you, now I can finally get rid of ToString("N")

@ronaldhoek
Copy link

ronaldhoek commented Oct 4, 2018

You could also parse the GUID to a byte array and convert it to a base64 encoded string, resulting in only 24 characters...
This all comes right out of the box with .NET ;)

A simple example using an application:

            Guid uuid = Guid.NewGuid();
            Guid uuid2;
            string s;

            // Regular parsing (to/from string)
            Console.WriteLine("Normal GUID string:");
            s = uuid.ToString();
            Console.WriteLine("As string: " + s + " (length = " + s.Length.ToString() + ")");
            uuid2 = new Guid(s);
            Console.WriteLine("From string: " + uuid2.ToString());
            Console.WriteLine("");

            // Shortened parsing (to/from string)
            Console.WriteLine("B64 GUID string:");
            s = Convert.ToBase64String(uuid.ToByteArray());
            Console.WriteLine("As string : " + s + " (length = " + s.Length.ToString() + ")");
            uuid2 = new Guid(Convert.FromBase64String(s));
            Console.WriteLine("From string: " + uuid2.ToString());
            Console.WriteLine("");

            Console.ReadKey();   

@khellang
Copy link
Author

khellang commented Nov 8, 2018

@ronaldhoek LOL, seriously? Of course you can, but that's not the point. I don't want Base64. The entire point of this is that I want a Base32 encoded string with the characteristics mentioned in the gist. Base64 don't have any of those characteristics, except being smaller.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment