Skip to content

Instantly share code, notes, and snippets.

@bradley219
Created April 8, 2013 17:30
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bradley219/5338662 to your computer and use it in GitHub Desktop.
Save bradley219/5338662 to your computer and use it in GitHub Desktop.
MySQL extension for user-defined function HAMMING_DISTANCE, which quickly calculates hamming distance between two 64-bit integers passed as arguments.
#ifdef STANDARD
/* STANDARD is defined, don't use any mysql functions */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#ifdef __WIN__
typedef unsigned __int64 ulonglong; /* Microsofts 64 bit types */
typedef __int64 longlong;
#else
typedef unsigned long long ulonglong;
typedef long long longlong;
#endif /*__WIN__*/
#else
#include <my_global.h>
#include <my_sys.h>
#if defined(MYSQL_SERVER)
#include <m_string.h> /* To get strmov() */
#else
/* when compiled as standalone */
#include <string.h>
#define strmov(a,b) stpcpy(a,b)
#define bzero(a,b) memset(a,0,b)
#endif
#endif
#include <mysql.h>
#include <ctype.h>
#ifdef _WIN32
/* inet_aton needs winsock library */
#pragma comment(lib, "ws2_32")
#endif
#ifdef HAVE_DLOPEN
#if !defined(HAVE_GETHOSTBYADDR_R) || !defined(HAVE_SOLARIS_STYLE_GETHOST)
static pthread_mutex_t LOCK_hostname;
#endif
/* These must be right or mysqld will not find the symbol! */
my_bool hamming_distance_init( UDF_INIT *initid, UDF_ARGS *args, char *message );
longlong hamming_distance( UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error );
void hamming_distance_deinit( UDF_INIT *initid );
my_bool hamming_distance_init( UDF_INIT *initid, UDF_ARGS *args, char *message )
{
if( args->arg_count != 2 )
{
strcpy( message, "Wrong number of arguments to HAMMING_DISTANCE; accepts exactly two arguments." );
return 1;
}
if( ((args->arg_type[0] != INT_RESULT) && (args->args[0] != NULL)) || ((args->arg_type[1] != INT_RESULT) && (args->args[0] != NULL)) )
{
strcpy( message, "One or both arguments to HAMMING_DISTANCE is not an integer." );
return 1;
}
initid->maybe_null = 1;
return 0;
}
void hamming_distance_deinit( UDF_INIT *initid __attribute__((unused)) )
{
return;
}
longlong hamming_distance( UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error )
{
if( (args->args[0] == NULL) || (args->args[1] == NULL) )
{
*is_null = 1;
return 0;
}
longlong hash1 = *((longlong*) args->args[0]);
longlong hash2 = *((longlong*) args->args[1]);
longlong x = hash1 ^ hash2;
const longlong m1 = 0x5555555555555555ULL;
const longlong m2 = 0x3333333333333333ULL;
const longlong h01 = 0x0101010101010101ULL;
const longlong m4 = 0x0f0f0f0f0f0f0f0fULL;
x -= (x >> 1) & m1;
x = (x & m2) + ((x >> 2) & m2);
x = (x + (x >> 4)) & m4;
return (x * h01)>>56;
}
#endif /* HAVE_DLOPEN */
@valbok
Copy link

valbok commented Dec 5, 2013

Is it faster than BIT_COUNT( hash1 ^ hash2 ) ?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment