Skip to content

Instantly share code, notes, and snippets.

@slaperche-zenly
Created February 13, 2022 05:04
Show Gist options
  • Save slaperche-zenly/204e9b8e305cfb8ce2eec49fbe7b9396 to your computer and use it in GitHub Desktop.
Save slaperche-zenly/204e9b8e305cfb8ce2eec49fbe7b9396 to your computer and use it in GitHub Desktop.
Fast implementation of h3IsValid
/// Maximum supported H3 resolution.
const MAX_RESOLUTION: usize = 15;
const H3_CELL_MODE: u64 = 1;
/// Maximum value for a base cell.
const MAX_BASE_CELL: u8 = 121;
/// Size, in bits, of a non-base cell (range [0; 6].
const CELL_BITSIZE: usize = 3;
/// The bit offset of the mode in an H3 cell index.
const MODE_OFFSET: usize = 59;
/// The bit offset of the reserved bits in an H3 cell index.
const RESERVED_BITS_OFFSET: usize = 56;
/// The bit offset of the base cell in an H3 cell index.
const BASE_CELL_OFFSET: usize = 45;
pub fn is_valid_h3(value: u64) -> bool {
// Check reserved bits.
if (value >> RESERVED_BITS_OFFSET) & 0b1000_0111 != 0 {
return false;
}
// Check index mode.
if (value >> MODE_OFFSET) & 0b1111 != H3_CELL_MODE {
return false;
}
// Check base cell.
let base = (value >> BASE_CELL_OFFSET) & 0b111_1111;
if base > MAX_BASE_CELL.into() {
return false;
}
// Resolution is always valid: coded on 4 bits, valid range is [0; 15].
let resolution = (value >> 52 & 0b1111) as usize;
// Check for the tail of unused cells after `resolution` cells
// We expect every bit to be 1 in the tail (because unused cells are
// represented by `0b111`), i.e. every bit set to 0 after a NOT.
let unused_count = MAX_RESOLUTION - resolution;
let unused_bitsize = unused_count * CELL_BITSIZE;
let unused_mask = (1 << unused_bitsize) - 1;
if (!value) & unused_mask != 0 {
return false;
}
// Check that we have `resolution` valid cells (no unused ones).
let cells_mask = (1 << (resolution * CELL_BITSIZE)) - 1;
let cells = (value >> unused_bitsize) & cells_mask;
if has_unused_cell(cells) {
return false;
}
// Check for pentagons with deleted subsequence.
if is_pentagon_base(base) && resolution != 0 {
// Move cells to the front, so that we can count leading zeroes.
let offset = 64 - (resolution * CELL_BITSIZE);
// Find the position of the first bit set, if it's a multiple of 3 that
// means we have a 001 as first non-zero cell, which is forbidden.
if ((cells << offset).leading_zeros() + 1) % 3 == 0 {
return false;
}
}
true
}
#[inline(always)]
fn is_pentagon_base(cell: u64) -> bool {
// Bitmap where only set bits are those whose offset match pentagons.
const BASE_PENTAGONS: u128 = 0x0020_0802_0008_0100_8402_0040_0100_4010;
BASE_PENTAGONS & (1 << cell) != 0
}
/// Checks if there is at least one unused cell in the given cells.
#[inline(always)]
fn has_unused_cell(cells: u64) -> bool {
// Unused cells are represented by `0b111`, so we actually want to check the
// absence of this pattern.
// This is akin to splitting the data into chunks of 3 bits and looking for
// the presence of a three-1 triplet.
//
// Now, looking for `0b111` is clearly not a common task, but we can twist
// the problem a bit to find back our footing ;)
// If we apply a NOT on our data we're now looking for `0b000` which is
// awfully similar to the research of a nul byte, a well-known task in
// C-land thanks to null-terminated strings.
//
// STOP, Archeology time!
//
// Let's dive into the annals of the Old Gods, a.k.a. comp.lang.c, and
// extract this golden nugget: Alan Mycroft's null-byte detection algorithm,
// posted in 1987
// See: https://groups.google.com/forum/#!original/comp.lang.c/2HtQXvg7iKc/xOJeipH6KLMJ
//
// The spell is: (value - lo_magic) & (!value & hi_magic)
//
// Here's a quick rundown on how it works:
//
// - The first part, `value - lo_magic`, will make sure that the MSB (most
// significant bit) of each chunk is set if:
// * the chunk is null (`0b000 - 0b001` wraps around to `0b111`).
// * the MSB + another bit are already set, e.g. `0b101`. That's because
// the lowest bit absorb the subtraction and the highest one is left
// untouched (e.g. `0b101 - 0b001 = 0b100`)
// - The second part, `!value & hi_magic`, will set the MSB of each chunk
// only if the MSB was unset in the original value.
//
// By ANDing both parts, we get a non-zero value if there was at least one
// null chunk: the first part selects null chunks and the ones with the MSB
// already set whereas the second part filter out the latter, thus leaving
// only null chunk with a bit set.
//
// A little example:
//
// cells = 001 010 111 011 110 110 000
// !cells = 110 101 000 100 001 001 111 // negate to convert 111 to 000.
// part 1 = 101 011 111 011 000 000 110
// part 2 = 000 000 100 000 100 100 000
// result = 000 000 100 000 000 000 000
//
// By tweaking this a bit to works on 64-bit AND on triplet instead of
// bytes, the magic occurs :)
const LO_MAGIC: u64 = 0x492_4924_9249; // ...001 001 001...
const HI_MAGIC: u64 = 0x1249_2492_4924; // ...100 100 100...
((!cells - LO_MAGIC) & (cells & HI_MAGIC)) != 0
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn is_valid() {
// Bitmask to clear the base cell.
const CLEAR_MODE: u64 = !(0b1111 << MODE_OFFSET);
// Bitmask to clear the base cell.
const CLEAR_RESERVED_BITS: u64 = !(0b0111 << RESERVED_BITS_OFFSET);
// Bitmask to clear the base cell.
const CLEAR_BASE_CELL: u64 = !(0b0111_1111 << BASE_CELL_OFFSET);
assert!(!is_valid_h3(612630286829617151), "bug");
// Test valid cell at resolution 0 to 15.
assert!(is_valid_h3(0x0807_5fff_ffff_ffff), "valid res 0");
assert!(is_valid_h3(0x0817_57ff_ffff_ffff), "valid res 1");
assert!(is_valid_h3(0x0827_54ff_ffff_ffff), "valid res 2");
assert!(is_valid_h3(0x0837_54ef_ffff_ffff), "valid res 3");
assert!(is_valid_h3(0x0847_54a9_ffff_ffff), "valid res 4");
assert!(is_valid_h3(0x0857_54e6_7fff_ffff), "valid res 5");
assert!(is_valid_h3(0x0867_54e6_4fff_ffff), "valid res 6");
assert!(is_valid_h3(0x0877_54e6_4dff_ffff), "valid res 7");
assert!(is_valid_h3(0x0887_54e6_499f_ffff), "valid res 8");
assert!(is_valid_h3(0x0897_54e6_4993_ffff), "valid res 9");
assert!(is_valid_h3(0x08a7_54e6_4992_ffff), "valid res 10");
assert!(is_valid_h3(0x08b7_54e6_4992_9fff), "valid res 11");
assert!(is_valid_h3(0x08c7_54e6_4992_9dff), "valid res 12");
assert!(is_valid_h3(0x08d7_54e6_4992_d6ff), "valid res 13");
assert!(is_valid_h3(0x08e7_54e6_4992_d6df), "valid res 14");
assert!(is_valid_h3(0x08f7_54e6_4992_d6d8), "valid res 15");
// Test high bit.
assert!(!is_valid_h3(0x88c2_bae3_0533_6bff), "high bit");
// Test base cells.
for base in 0..123 {
let value = (0x0807_5fff_ffff_ffff & CLEAR_BASE_CELL) | (base << BASE_CELL_OFFSET);
let res = is_valid_h3(value);
// Base cell range is from 0 to 121, any other value is invalid.
assert_eq!(res, base <= MAX_BASE_CELL.into(), "base cell {}", base);
}
// Test mode bits.
for mode in 0..16 {
let value = (0x0807_5fff_ffff_ffff & CLEAR_MODE) | (mode << MODE_OFFSET);
let res = is_valid_h3(value);
// Cell mode is 1, any other value is invalid for a cell index.
assert_eq!(res, mode == H3_CELL_MODE, "mode {}", mode);
}
// Test reserved bits.
for unused in 0..8 {
let value =
(0x0807_5fff_ffff_ffff & CLEAR_RESERVED_BITS) | (unused << RESERVED_BITS_OFFSET);
let res = is_valid_h3(value);
// Reserved bits are expected to be 0, any other value is invalid..
assert_eq!(res, unused == 0, "reserved bits {}", unused);
}
// Test unexpected unused cell (e.g. resolution 12 but 3rd cell is
// unused).
assert!(!is_valid_h3(0x08c2_bee3_0533_6bff), "first");
assert!(!is_valid_h3(0x08c2_bae3_3d33_6bff), "middle");
assert!(!is_valid_h3(0x08c2_bae3_0533_6fff), "last");
// Test missing unused cell (e.g. resolution 12 less than 3 unused
// cells).
assert!(!is_valid_h3(0x08c0_fae3_0533_6aff), "first");
assert!(!is_valid_h3(0x08c0_fae3_0533_6fef), "middle");
assert!(!is_valid_h3(0x0817_57ff_ffff_fffe), "last");
// Test deleted subsequence (invalid for pentagons, valid for hexagons).
assert!(is_valid_h3(0x0818_87ff_ffff_ffff), "hexagon");
assert!(!is_valid_h3(0x0810_87ff_ffff_ffff), "pentagon");
assert!(is_valid_h3(0x0880_4000_011f_ffff), "hexagon");
assert!(!is_valid_h3(0x0880_8000_011f_ffff), "pentagon");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment