Skip to content

Instantly share code, notes, and snippets.

@teknoman117
Last active June 14, 2018 21:45
Show Gist options
  • Star 13 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save teknoman117/d4d952942b4314781432 to your computer and use it in GitHub Desktop.
Save teknoman117/d4d952942b4314781432 to your computer and use it in GitHub Desktop.
MurmurHash2 as a C++11 Constant Expression for compiler generated hashes of constant strings
/*
* Copyright (c) 2018 Nathan Lewis <linux.robotdude@gmail.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of mosquitto nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
// compile with "g++ -std=c++11 constexpr_string_hash.cpp"
#include <iostream>
#include <cinttypes>
namespace compiletime
{
namespace internal
{
// Murmur hash constants
constexpr const uint64_t m = 0xc6a4a7935bd1e995;
constexpr const int r = 47;
constexpr uint64_t crotate(uint64_t a)
{
return a ^ (a >> r);
}
constexpr uint64_t cfinalize_h(const char *data, size_t key, uint64_t h )
{
return (key != 0) ? cfinalize_h(data, key-1, (h ^ (uint64_t(data[key - 1]) << (8 * (key-1))))) : h * m;
}
constexpr uint64_t cfinalize(const char *data, size_t len, uint64_t h)
{
return (len & 7) ? crotate(crotate(cfinalize_h(data, len & 7, h)) * m)
: crotate(crotate(h) * m);
}
// reinterpret cast is illegal (static is fine) so we have to manually load 64 bit chuncks of string instead
// of casting char* to uint64_t*
//
// TODO - this only works on little endian machines .... fuuuu
constexpr uint64_t cblock(const char *data, size_t offset = 0)
{
return (offset == 7) ? uint64_t(data[offset]) << (8 * offset)
: (uint64_t(data[offset]) << (8 * offset)) | cblock(data, offset+1);
}
// Mixing function for the hash function
constexpr uint64_t cmix_h(const char *data, uint64_t h, size_t offset)
{
return (h ^ (crotate( cblock(data + offset) * m ) * m)) * m;
}
// Control function for the mixing
constexpr uint64_t cmix(const char *data, size_t len, uint64_t h, size_t offset = 0)
{
return (offset == (len & ~size_t(7))) ? cfinalize(data + offset, len, h)
: cmix(data, len, cmix_h(data, h, offset), offset+8);
}
}
// Compile time version of strlen
constexpr size_t length_cstring(const char *data, const int idx = 0)
{
return (data[idx] == '\0') ? idx : length_cstring(data, idx+1);
}
// Compile time version of MurmurHash2 for
constexpr uint64_t hash_cstring(const char *data, size_t len, uint64_t seed = 0xc70f6907UL)
{
return internal::cmix(data, len, seed ^ (len * internal::m));
}
}
constexpr const char* aString = "Does a set of all sets contain itself?";
constexpr const uint64_t aStringHash = compiletime::hash_cstring(aString, compiletime::length_cstring(aString));
int main (int argc, char **argv)
{
// Print the hash we computed for the string (in the compiler)
std::cout << "Hash of \"" << aString << "\" = " << (int64_t) aStringHash << std::endl;
return 0;
}
.file "constexpr_string_hash.cpp"
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "Hash of \""
.section .rodata.str1.8,"aMS",@progbits,1
.align 8
.LC1:
.string "Does a set of all sets contain itself?"
.section .rodata.str1.1
.LC2:
.string "\" = "
.section .text.unlikely,"ax",@progbits
.LCOLDB3:
.section .text.startup,"ax",@progbits
.LHOTB3:
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB1415:
.cfi_startproc
subq $8, %rsp
.cfi_def_cfa_offset 16
movl $9, %edx
movl $.LC0, %esi
movl $_ZSt4cout, %edi
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l
movl $38, %edx
movl $.LC1, %esi
movl $_ZSt4cout, %edi
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l
movl $4, %edx
movl $.LC2, %esi
movl $_ZSt4cout, %edi
call _ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l
movabsq $-2163680713539448014, %rsi
movl $_ZSt4cout, %edi
call _ZNSo9_M_insertIlEERSoT_
movq %rax, %rdi
call _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
xorl %eax, %eax
addq $8, %rsp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE1415:
.size main, .-main
.section .text.unlikely
.LCOLDE3:
.section .text.startup
.LHOTE3:
.section .text.unlikely
.LCOLDB4:
.section .text.startup
.LHOTB4:
.p2align 4,,15
.type _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB1587:
.cfi_startproc
subq $8, %rsp
.cfi_def_cfa_offset 16
movl $_ZStL8__ioinit, %edi
call _ZNSt8ios_base4InitC1Ev
movl $__dso_handle, %edx
movl $_ZStL8__ioinit, %esi
movl $_ZNSt8ios_base4InitD1Ev, %edi
addq $8, %rsp
.cfi_def_cfa_offset 8
jmp __cxa_atexit
.cfi_endproc
.LFE1587:
.size _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
.section .text.unlikely
.LCOLDE4:
.section .text.startup
.LHOTE4:
.section .init_array,"aw"
.align 8
.quad _GLOBAL__sub_I_main
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.hidden __dso_handle
.ident "GCC: (GNU) 5.1.0"
.section .note.GNU-stack,"",@progbits
% g++ -O3 -S -std=c++11 constexpr_string_hash.cpp ~/Downloads Sedenion
% ./a.out ~/Downloads Sedenion
Hash of "Does a set of all sets contain itself?" = -2163680713539448014
%
@gunrot
Copy link

gunrot commented Oct 17, 2017

Hi Teknoman117, this looks pretty cool. Did you intentionally not add a license to this? It would be good , if you add a license, so everybody knows under which terms it can be used by others. Without any license it is your copyright and any use is a copyright breach.

@teknoman117
Copy link
Author

@gunrot - No, i did not intentionally leave out a license and sorry for not seeing your comment earlier. I don't know why I wouldn't get a notification for a gist comment. I've added the 3-clause BSD license to the header so feel free to use this code under those terms!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment