Skip to content

Instantly share code, notes, and snippets.

/foo.rs Secret

Created September 12, 2016 17:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/3b0b489137af9006d5c498f10d42514a to your computer and use it in GitHub Desktop.
Save anonymous/3b0b489137af9006d5c498f10d42514a to your computer and use it in GitHub Desktop.
use std::collections::HashMap;
use std::hash::{Hasher, SipHasher};
use std::io::{self, stdout, Write};
use std::{mem, ptr, fs};
use std::time::Instant;
const DICT: &'static [u8] = include_bytes!("<dictionary here>");
/// A DJB2 hasher.
pub struct Djb2 {
state: u64,
}
impl Default for Djb2 {
fn default() -> Djb2 {
Djb2 {
state: 5381,
}
}
}
impl Hasher for Djb2 {
fn finish(&self) -> u64 {
self.state
}
fn write(&mut self, bytes: &[u8]) {
for &b in bytes {
// s * 35 + b = s' (mod 2^64)
self.state = (self.state << 5).wrapping_add(self.state).wrapping_add(b as u64);
}
}
}
/// A modified (for better performance) DJB2 hasher.
///
/// It has slightly more collisions than DJB2, but has a much better performance, due to reading
/// from the buffer in 8-byte (u64) chunks.
pub struct LongHasher {
state: u64,
k0: u64,
k1: u64,
}
impl Default for LongHasher {
fn default() -> LongHasher {
LongHasher {
state: 0x1ED00D7E1E707EB2,
k0: 0x953779B185EBCA84,
k1: 0xABF72BEEA3A1BDE3,
}
}
}
impl Hasher for LongHasher {
fn finish(&self) -> u64 {
self.state ^ self.k1
}
#[inline]
fn write_u64(&mut self, n: u64) {
self.state = self.state.wrapping_mul(0x1FFFFFFFFFFFFFFF).wrapping_add(n).wrapping_mul(0x9E3779B185EBCA87).rotate_right(33) ^ self.k0;
}
fn write(&mut self, bytes: &[u8]) {
for b in 0..bytes.len() / 8 {
self.write_u64(*unsafe { mem::transmute::<_, &u64>(&bytes[b * 8]) });
}
let mut extra = 0xABF72BEEA3B1BDEF;
unsafe {
let copy = bytes.len() % 8;
let last = bytes.len().saturating_sub(copy);
ptr::copy_nonoverlapping(
bytes[last..].as_ptr(),
&mut extra as *mut u64 as *mut u8,
copy,
);
}
self.write_u64(extra);
}
}
#[derive(Default)]
struct SimpleSum {
state: u64
}
impl Hasher for SimpleSum {
fn finish(&self) -> u64 {
self.state
}
fn write(&mut self, bytes: &[u8]) {
for b in 0..bytes.len() {
self.state = self.state.wrapping_add(bytes[b] as u64);
}
}
}
#[derive(Default)]
struct SimpleSumXor {
state: u64
}
impl Hasher for SimpleSumXor {
fn finish(&self) -> u64 {
self.state
}
fn write(&mut self, bytes: &[u8]) {
for b in 0..bytes.len() {
self.state = 0xDEADFAAA ^ self.state.wrapping_add(bytes[b] as u64);
}
}
}
#[derive(Default)]
struct SlidingSum {
state: u64
}
impl Hasher for SlidingSum {
fn finish(&self) -> u64 {
self.state
}
fn write(&mut self, bytes: &[u8]) {
for b in bytes {
self.state = (self.state << 6).wrapping_add(self.state << 16).wrapping_sub(self.state).wrapping_add(*b as u64);
}
}
}
#[derive(Default)]
struct Sdbm {
state: u32
}
impl Hasher for Sdbm {
fn finish(&self) -> u64 {
self.state as u64
}
fn write(&mut self, bytes: &[u8]) {
for b in bytes {
self.state = (*b as u32).wrapping_add(self.state << 6).wrapping_add(self.state << 16).wrapping_sub(self.state);
}
}
}
// TODO remove
/*
fn mb(s: u64, c: u64) -> u64 {
let words = DICT.split(|&x| x == b'\n');
let mut buckets = HashMap::new();
for i in words.take(2048 * 4) {
let mut hasher: LongHasher = LongHasher {
state: s,
c: c,
};
hasher.write(i);
let hash = hasher.finish() % 2048;
if let Some(x) = buckets.get_mut(&hash) {
*x += 1;
continue;
}
buckets.insert(hash, 1);
}
let mut collisions = 0;
let mut max_bucket = 0;
for (_, &i) in buckets.iter() {
if i > 0 {
collisions += 1;
if max_bucket < i {
max_bucket = i;
}
}
}
max_bucket
}
*/
fn number_of_collisions<'a, T: Hasher + Default, I: Iterator<Item = &'a [u8]>>(iter: I) -> (u64, u64) {
let mut buckets = HashMap::new();
for i in iter {
let mut hasher: T = Default::default();
hasher.write(i);
let hash = hasher.finish() % 2048;
if let Some(x) = buckets.get_mut(&(hash ^ 0xABABABAB)) {
*x += 1;
continue;
}
buckets.insert(hash ^ 0xABABABAB, 1);
}
let mut collisions = 0;
let mut max_bucket = 0;
for (_, &i) in buckets.iter() {
if i > 0 {
collisions += 1;
if max_bucket < i {
max_bucket = i;
}
}
}
(collisions, max_bucket)
}
fn bench_hasher<T: Hasher + Default>(name: &str) {
let time = Instant::now();
let mut stdout = stdout();
let words = DICT.split(|&x| x == b'\n');
let (collisions_w, max_bucket_w) = number_of_collisions::<T, _>(words);
let collisions = collisions_w; //+ collisions_u;
let max_bucket = max_bucket_w; //+ max_bucket_u;
let ela = time.elapsed();
stdout.write(b"~ ").unwrap();
stdout.write(name.as_bytes()).unwrap();
stdout.write(b"\n").unwrap();
stdout.write(b" Filled buckets: ").unwrap();
stdout.write(collisions.to_string().as_bytes()).unwrap();
stdout.write(b"\n").unwrap();
stdout.write(b" Max bucket: ").unwrap();
stdout.write(max_bucket.to_string().as_bytes()).unwrap();
stdout.write(b"\n").unwrap();
stdout.write(b" Time: ").unwrap();
stdout.write(ela.as_secs().to_string().as_bytes()).unwrap();
stdout.write(b" s. ").unwrap();
stdout.write((ela.subsec_nanos() / 1000).to_string().as_bytes()).unwrap();
stdout.write(" ms.\n".as_bytes()).unwrap();
let mut hasher = T::default();
let time = Instant::now();
for i in 0..1000000000 / 10 {
hasher.write_u64(i);
}
let mut null = fs::File::open("/dev/null").unwrap();
null.write(&[hasher.finish() as u8]);
let ela = time.elapsed();
let count = 10.0 / 8.0 / (ela.subsec_nanos() as f64 / 1_000_000_000.0 + ela.as_secs() as f64);
stdout.write(b" GB/s: ").unwrap();
stdout.write(count.to_string().as_bytes()).unwrap();
stdout.write(b"\n").unwrap();
}
fn main() {
bench_hasher::<Djb2>("DJB2");
bench_hasher::<SipHasher>("SipHasher");
bench_hasher::<LongHasher>("LongHasher");
bench_hasher::<SimpleSum>("SimpleSum");
bench_hasher::<SimpleSumXor>("SimpleSumXor");
bench_hasher::<SlidingSum>("SlidingSum+");
bench_hasher::<Sdbm>("Sdbm");
/*
let mut stdout = stdout();
let mut record = mb(37, 35);
for i in 500000..5000000 {
for j in 10..50 {
let mbi = mb(i, j);
if mbi < record {
record = mbi;
stdout.write(b"i: ");
stdout.write(i.to_string().as_bytes());
stdout.write(b" j: ");
stdout.write(j.to_string().as_bytes());
stdout.write(b" res: ");
stdout.write(mbi.to_string().as_bytes());
stdout.write(b"\n");
}
}
}
*/
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment