-
-
Save slaperche-zenly/204e9b8e305cfb8ce2eec49fbe7b9396 to your computer and use it in GitHub Desktop.
Fast implementation of h3IsValid
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// Maximum supported H3 resolution. | |
const MAX_RESOLUTION: usize = 15; | |
const H3_CELL_MODE: u64 = 1; | |
/// Maximum value for a base cell. | |
const MAX_BASE_CELL: u8 = 121; | |
/// Size, in bits, of a non-base cell (range [0; 6]. | |
const CELL_BITSIZE: usize = 3; | |
/// The bit offset of the mode in an H3 cell index. | |
const MODE_OFFSET: usize = 59; | |
/// The bit offset of the reserved bits in an H3 cell index. | |
const RESERVED_BITS_OFFSET: usize = 56; | |
/// The bit offset of the base cell in an H3 cell index. | |
const BASE_CELL_OFFSET: usize = 45; | |
pub fn is_valid_h3(value: u64) -> bool { | |
// Check reserved bits. | |
if (value >> RESERVED_BITS_OFFSET) & 0b1000_0111 != 0 { | |
return false; | |
} | |
// Check index mode. | |
if (value >> MODE_OFFSET) & 0b1111 != H3_CELL_MODE { | |
return false; | |
} | |
// Check base cell. | |
let base = (value >> BASE_CELL_OFFSET) & 0b111_1111; | |
if base > MAX_BASE_CELL.into() { | |
return false; | |
} | |
// Resolution is always valid: coded on 4 bits, valid range is [0; 15]. | |
let resolution = (value >> 52 & 0b1111) as usize; | |
// Check for the tail of unused cells after `resolution` cells | |
// We expect every bit to be 1 in the tail (because unused cells are | |
// represented by `0b111`), i.e. every bit set to 0 after a NOT. | |
let unused_count = MAX_RESOLUTION - resolution; | |
let unused_bitsize = unused_count * CELL_BITSIZE; | |
let unused_mask = (1 << unused_bitsize) - 1; | |
if (!value) & unused_mask != 0 { | |
return false; | |
} | |
// Check that we have `resolution` valid cells (no unused ones). | |
let cells_mask = (1 << (resolution * CELL_BITSIZE)) - 1; | |
let cells = (value >> unused_bitsize) & cells_mask; | |
if has_unused_cell(cells) { | |
return false; | |
} | |
// Check for pentagons with deleted subsequence. | |
if is_pentagon_base(base) && resolution != 0 { | |
// Move cells to the front, so that we can count leading zeroes. | |
let offset = 64 - (resolution * CELL_BITSIZE); | |
// Find the position of the first bit set, if it's a multiple of 3 that | |
// means we have a 001 as first non-zero cell, which is forbidden. | |
if ((cells << offset).leading_zeros() + 1) % 3 == 0 { | |
return false; | |
} | |
} | |
true | |
} | |
#[inline(always)] | |
fn is_pentagon_base(cell: u64) -> bool { | |
// Bitmap where only set bits are those whose offset match pentagons. | |
const BASE_PENTAGONS: u128 = 0x0020_0802_0008_0100_8402_0040_0100_4010; | |
BASE_PENTAGONS & (1 << cell) != 0 | |
} | |
/// Checks if there is at least one unused cell in the given cells. | |
#[inline(always)] | |
fn has_unused_cell(cells: u64) -> bool { | |
// Unused cells are represented by `0b111`, so we actually want to check the | |
// absence of this pattern. | |
// This is akin to splitting the data into chunks of 3 bits and looking for | |
// the presence of a three-1 triplet. | |
// | |
// Now, looking for `0b111` is clearly not a common task, but we can twist | |
// the problem a bit to find back our footing ;) | |
// If we apply a NOT on our data we're now looking for `0b000` which is | |
// awfully similar to the research of a nul byte, a well-known task in | |
// C-land thanks to null-terminated strings. | |
// | |
// STOP, Archeology time! | |
// | |
// Let's dive into the annals of the Old Gods, a.k.a. comp.lang.c, and | |
// extract this golden nugget: Alan Mycroft's null-byte detection algorithm, | |
// posted in 1987 | |
// See: https://groups.google.com/forum/#!original/comp.lang.c/2HtQXvg7iKc/xOJeipH6KLMJ | |
// | |
// The spell is: (value - lo_magic) & (!value & hi_magic) | |
// | |
// Here's a quick rundown on how it works: | |
// | |
// - The first part, `value - lo_magic`, will make sure that the MSB (most | |
// significant bit) of each chunk is set if: | |
// * the chunk is null (`0b000 - 0b001` wraps around to `0b111`). | |
// * the MSB + another bit are already set, e.g. `0b101`. That's because | |
// the lowest bit absorb the subtraction and the highest one is left | |
// untouched (e.g. `0b101 - 0b001 = 0b100`) | |
// - The second part, `!value & hi_magic`, will set the MSB of each chunk | |
// only if the MSB was unset in the original value. | |
// | |
// By ANDing both parts, we get a non-zero value if there was at least one | |
// null chunk: the first part selects null chunks and the ones with the MSB | |
// already set whereas the second part filter out the latter, thus leaving | |
// only null chunk with a bit set. | |
// | |
// A little example: | |
// | |
// cells = 001 010 111 011 110 110 000 | |
// !cells = 110 101 000 100 001 001 111 // negate to convert 111 to 000. | |
// part 1 = 101 011 111 011 000 000 110 | |
// part 2 = 000 000 100 000 100 100 000 | |
// result = 000 000 100 000 000 000 000 | |
// | |
// By tweaking this a bit to works on 64-bit AND on triplet instead of | |
// bytes, the magic occurs :) | |
const LO_MAGIC: u64 = 0x492_4924_9249; // ...001 001 001... | |
const HI_MAGIC: u64 = 0x1249_2492_4924; // ...100 100 100... | |
((!cells - LO_MAGIC) & (cells & HI_MAGIC)) != 0 | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn is_valid() { | |
// Bitmask to clear the base cell. | |
const CLEAR_MODE: u64 = !(0b1111 << MODE_OFFSET); | |
// Bitmask to clear the base cell. | |
const CLEAR_RESERVED_BITS: u64 = !(0b0111 << RESERVED_BITS_OFFSET); | |
// Bitmask to clear the base cell. | |
const CLEAR_BASE_CELL: u64 = !(0b0111_1111 << BASE_CELL_OFFSET); | |
assert!(!is_valid_h3(612630286829617151), "bug"); | |
// Test valid cell at resolution 0 to 15. | |
assert!(is_valid_h3(0x0807_5fff_ffff_ffff), "valid res 0"); | |
assert!(is_valid_h3(0x0817_57ff_ffff_ffff), "valid res 1"); | |
assert!(is_valid_h3(0x0827_54ff_ffff_ffff), "valid res 2"); | |
assert!(is_valid_h3(0x0837_54ef_ffff_ffff), "valid res 3"); | |
assert!(is_valid_h3(0x0847_54a9_ffff_ffff), "valid res 4"); | |
assert!(is_valid_h3(0x0857_54e6_7fff_ffff), "valid res 5"); | |
assert!(is_valid_h3(0x0867_54e6_4fff_ffff), "valid res 6"); | |
assert!(is_valid_h3(0x0877_54e6_4dff_ffff), "valid res 7"); | |
assert!(is_valid_h3(0x0887_54e6_499f_ffff), "valid res 8"); | |
assert!(is_valid_h3(0x0897_54e6_4993_ffff), "valid res 9"); | |
assert!(is_valid_h3(0x08a7_54e6_4992_ffff), "valid res 10"); | |
assert!(is_valid_h3(0x08b7_54e6_4992_9fff), "valid res 11"); | |
assert!(is_valid_h3(0x08c7_54e6_4992_9dff), "valid res 12"); | |
assert!(is_valid_h3(0x08d7_54e6_4992_d6ff), "valid res 13"); | |
assert!(is_valid_h3(0x08e7_54e6_4992_d6df), "valid res 14"); | |
assert!(is_valid_h3(0x08f7_54e6_4992_d6d8), "valid res 15"); | |
// Test high bit. | |
assert!(!is_valid_h3(0x88c2_bae3_0533_6bff), "high bit"); | |
// Test base cells. | |
for base in 0..123 { | |
let value = (0x0807_5fff_ffff_ffff & CLEAR_BASE_CELL) | (base << BASE_CELL_OFFSET); | |
let res = is_valid_h3(value); | |
// Base cell range is from 0 to 121, any other value is invalid. | |
assert_eq!(res, base <= MAX_BASE_CELL.into(), "base cell {}", base); | |
} | |
// Test mode bits. | |
for mode in 0..16 { | |
let value = (0x0807_5fff_ffff_ffff & CLEAR_MODE) | (mode << MODE_OFFSET); | |
let res = is_valid_h3(value); | |
// Cell mode is 1, any other value is invalid for a cell index. | |
assert_eq!(res, mode == H3_CELL_MODE, "mode {}", mode); | |
} | |
// Test reserved bits. | |
for unused in 0..8 { | |
let value = | |
(0x0807_5fff_ffff_ffff & CLEAR_RESERVED_BITS) | (unused << RESERVED_BITS_OFFSET); | |
let res = is_valid_h3(value); | |
// Reserved bits are expected to be 0, any other value is invalid.. | |
assert_eq!(res, unused == 0, "reserved bits {}", unused); | |
} | |
// Test unexpected unused cell (e.g. resolution 12 but 3rd cell is | |
// unused). | |
assert!(!is_valid_h3(0x08c2_bee3_0533_6bff), "first"); | |
assert!(!is_valid_h3(0x08c2_bae3_3d33_6bff), "middle"); | |
assert!(!is_valid_h3(0x08c2_bae3_0533_6fff), "last"); | |
// Test missing unused cell (e.g. resolution 12 less than 3 unused | |
// cells). | |
assert!(!is_valid_h3(0x08c0_fae3_0533_6aff), "first"); | |
assert!(!is_valid_h3(0x08c0_fae3_0533_6fef), "middle"); | |
assert!(!is_valid_h3(0x0817_57ff_ffff_fffe), "last"); | |
// Test deleted subsequence (invalid for pentagons, valid for hexagons). | |
assert!(is_valid_h3(0x0818_87ff_ffff_ffff), "hexagon"); | |
assert!(!is_valid_h3(0x0810_87ff_ffff_ffff), "pentagon"); | |
assert!(is_valid_h3(0x0880_4000_011f_ffff), "hexagon"); | |
assert!(!is_valid_h3(0x0880_8000_011f_ffff), "pentagon"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment