Skip to content

Instantly share code, notes, and snippets.

@simondeeley
Last active May 16, 2020 17:59
Show Gist options
  • Save simondeeley/abbcbd3a2585bc89add2fa2f47844a9f to your computer and use it in GitHub Desktop.
Save simondeeley/abbcbd3a2585bc89add2fa2f47844a9f to your computer and use it in GitHub Desktop.
Object Orientated Programming in C
/*-
* Copyright © 2018, 2019, 2020 Simon Deeley and respective contributors.
* All rights reserved.
*
* This source file contains source code derived from source code and topics
* written by Axel-Tobias Schreiner Copyright © 1993. Other contributors
* are attributed where necessary in the source code comments.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed in Up! Copyright Simon Deeley
* and contributors.
* 4. Neither the authors name nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $@ hash.c - (c) 2019 Simon Deeley $
*/
#include "hash.h"
/*
* By Paul Hsieh (C) 2004, 2005. Covered under the Paul Hsieh derivative
* license. See:
* http://www.azillionmonkeys.com/qed/weblicense.html for license details.
*
* http://www.azillionmonkeys.com/qed/hash.html
*/
#undef get16bits
#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
#define get16bits(d) (*((const uint16_t *) (d)))
#endif
#if !defined (get16bits)
#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)+(uint32_t)(((const uint8_t *)(d))[0]) )
#endif
const uint32_t
strhash ( const char *str,
size_t len,
uintptr_t hash )
{
register uintptr_t tmp;
register uint_least32_t rem;
if ( len <= 0 || str == NULL ) return 0;
rem = len & 3;
len >>= 2;
/* Main loop */
for ( ; len--; ) {
hash += get16bits( str );
tmp = ( get16bits( str + 2 ) << 11 ) ^ hash;
hash = ( hash << 16 ) ^ tmp;
str += 2 * sizeof( uint16_t );
hash += hash >> 11;
}
/* Handle end cases */
switch ( rem ) {
case 3:
hash += get16bits( str );
hash ^= hash << 16;
hash ^= str[ sizeof( uint16_t ) ] << 18;
hash += hash >> 11;
break;
case 2:
hash += get16bits( str );
hash ^= hash << 11;
hash += hash >> 17;
break;
case 1:
hash += *str;
hash ^= hash << 10;
hash += hash >> 1;
}
/* Force "avalanching" of final 127 bits */
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 4;
hash += hash >> 17;
hash ^= hash << 25;
hash += hash >> 6;
return (uint_least32_t) hash;
}
/* Undefine this macro so it doesn't effect other file scopes */
#undef get16bits
/*-
* Copyright © 2018, 2019, 2020 Simon Deeley and respective contributors.
* All rights reserved.
*
* This source file contains source code derived from source code and topics
* written by Axel-Tobias Schreiner Copyright © 1993. Other contributors
* are attributed where necessary in the source code comments.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed in Up! Copyright Simon Deeley
* and contributors.
* 4. Neither the authors name nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $@ hash.h - (c) 2019 Simon Deeley $
*/
#ifndef _HASH_H
#define _HASH_H
#include <sys/types.h>
const uint32_t strhash ( const char *str, const size_t len, uintptr_t scope );
#endif /* _HASH_H */
/*-
* Copyright © 2018, 2019, 2020 Simon Deeley and respective contributors.
* All rights reserved.
*
* This source file contains source code derived from source code and topics
* written by Axel-Tobias Schreiner Copyright © 1993. Other contributors
* are attributed where necessary in the source code comments.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed in Up! Copyright Simon Deeley
* and contributors.
* 4. Neither the authors name nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $@ object.c - (c) 2019 Simon Deeley $
*/
#include "object.h"
/* Include SIGSEGV, SIGBUS */
#include <signal.h>
/* Include malloc(), free() */
#include <stdlib.h>
/* Include va_copy(), va_start(), va_end() */
#include <stdarg.h>
#define MAGIC 0x0effaced
#if !defined( is_object )
#include <assert.h>
#define is_object(p) ( assert(p), assert(((struct Object *) p) -> magic == MAGIC), p )
#endif
/* Forward declarations used solely in this file. */
static const struct Class _Object;
static const struct Class _Class;
static void catch ( int sig );
/*
* One-time initializer to return static Object description
*/
const void * const
Object ( void )
{
return &_Object;
}
/*
* One-time initializer to return static Class description
*/
const void * const
Class ( void )
{
return &_Class;
}
/*
* Ensure an object is of a specific class description.
*
* This function is mostly used for damage control, if '_self' is of type
* 'class' then this function returns its argument _self, otherwise cast()
* terminates the calling process.
*
* This function is also responsible for checking any objects supplied as
* parameters to a method with static linkage:- we can wrap cast() around a
* dubious pointer to limit the damage which an unexpected value could do. In a
* similar fashion, is also used for safely dereferencing pointers upon import
* to a method or selector.
*
* Although cast() accepts _self with a const qualifier, it returns the value
* without const to avoid error messages on assignment
*/
void
*cast ( const void * restrict _class,
const void * restrict _self )
{
/* Catch any signal interupts in custom wrappers. */
void ( *sigsegv )( int ) = signal( SIGSEGV, catch );
#ifdef SIGBUS
void ( *sigbus )( int ) = signal( SIGBUS, catch );
#endif
const struct Object *self = is_object( _self );
const struct Class *class = is_object( self -> class );
/* Iterate up through class hiearchy to find the correct class */
if ( _class != Object() )
{
is_object( _class );
while ( class != _class )
{
assert( class != Object() );
class = class -> super;
}
}
/* Revert signal interupts back to the system. We execute these
* functions in the inverse order from those above to ensure they
* are correctly reinstated.
*/
#ifdef SIGBUS
signal( SIGBUS, sigbus );
#endif
signal( SIGSEGV, sigsegv );
return (void *) self;
}
const struct Class *
super ( const void * _self )
{
const struct Class * self = cast(Class(), _self);
return self -> super;
}
const char *
type_of (const void * _self)
{
const struct Class * self = cast(Class(), _self);
return self -> name;
}
void *
ctor ( void * _self,
va_list * app )
{
void * result;
const struct Class * class = class_of(_self);
assert(class -> ctor.method);
result = ((void * (*) ()) class -> ctor.method)(_self, app);
return result;
}
void *
super_ctor ( const void * restrict _class,
void * restrict _self,
va_list * app )
{
const struct Class * superclass = super(_class);
assert(superclass -> ctor.method);
return ((void * (*) ()) superclass -> ctor.method)(_self, app);
}
void *
dtor ( void * _self )
{
const struct Class * class = class_of(_self);
assert(class -> dtor.method);
return ((void * (*) ()) class -> dtor.method)(_self);
}
void *
super_dtor ( const void * restrict _class,
const void * restrict _self )
{
const struct Class * superclass = super(_class);
assert(superclass -> dtor.method);
return ((void * (*) ()) superclass -> dtor.method)(_self);
}
void
delete ( const void * _self )
{
const struct Class * class = class_of(_self);
assert(class -> delete.method);
((void (*) ()) class -> delete.method)(_self);
}
void
super_delete ( const void * restrict _class,
void * restrict _self )
{
const struct Class * superclass = super(_class);
assert(superclass -> delete.method);
((void (*) ()) superclass -> delete.method)(_self);
}
struct Object *
new ( const void * _self, ... )
{
struct Object * result;
va_list ap;
const struct Class * class = cast(Class(), _self);
va_start(ap, _self);
assert(class -> new.method);
result = ((struct Object * (*) ()) class -> new.method)(_self, & ap);
va_end(ap);
return result;
}
struct Object *
super_new ( const void * restrict _class,
const void * restrict _self,
va_list * app )
{
const struct Class * superclass = super(_class);
assert(superclass -> new.method);
return ((struct Object * (*) ()) superclass -> new.method)(_self, app);
}
const struct Class *
class_of ( const void * _self )
{
const struct Object * self = cast(Object(), _self);
return self -> class;
}
size_t
size_of ( const void * _self )
{
const struct Class * class = class_of(_self);
return class -> size;
}
int
is_a ( const void * restrict _self,
const void * restrict class )
{
if (_self)
{
const struct Object * self = cast(Object(), _self);
cast(Class(), class);
return class_of(self) == class;
}
return 0;
}
int
is_of ( const void * restrict _self,
const void * restrict class )
{
if (_self)
{
const struct Class * myClass;
const struct Object * self = cast(Object(), _self);
cast(Class(), class);
myClass = class_of(self);
if (class != Object()) {
while (myClass != class)
if (myClass != Object())
myClass = super(myClass);
else
return 0;
}
return 1;
}
return 0;
}
Method
responds_to ( const void * _self, const char * tag )
{
if (tag && * tag)
{
const struct Class * class = class_of(_self);
const struct Method * p = & class -> ctor;
unsigned long nmeth =
(size_of(class) - offsetof(struct Class, ctor))
/ sizeof(struct Method);
do {
if (p -> tag && strcmp(p -> tag, tag) == 0) {
return p -> method ? p -> selector : 0;
}
} while ((void)(++ p), -- nmeth);
}
return 0;
}
static
struct Object *
Object_new ( const void * _self,
va_list * app )
{
const struct Class * self = cast(Class(), _self);
struct Object * object;
assert(self -> size);
object = calloc(1, self -> size);
assert(object);
object -> magic = MAGIC;
object -> class = self;
return ctor( object, app );
}
static
void *
Object_ctor ( const void * _self,
va_list * app )
{
struct Object * self = cast(Object(), _self);
return self;
}
static void *
Object_dtor ( const void * _self )
{
struct Object * self = cast(Object(), _self);
return self;
}
static void
Object_delete ( const void * _self )
{
struct Object * self = cast( Object(), _self );
free( dtor( self ) );
self = NULL;
}
static void
Class_delete ( const void * _self )
{
struct Class * self = cast(Class(), _self);
debugf("%s: cannot delete class\n", self -> name );
}
static void *
Class_dtor ( void * _self )
{
assert(0);
return 0;
}
static void *
Class_ctor ( void * _self,
va_list * app)
{
struct Class * self = _self;
const size_t offset = offsetof(struct Class, ctor);
Method selector;
va_list ap;
va_copy(ap, *app);
self -> name = va_arg(ap, char *);
self -> super = cast(Class(), va_arg(ap, void *));
self -> size = va_arg(ap, size_t);
memcpy((char *) self + offset, (char *) self -> super + offset,
size_of(self -> super) - offset);
while ((selector = va_arg(ap, Method))) {
const char * tag = va_arg(ap, const char *);
Method method = va_arg(ap, Method);
if (selector == (Method) ctor) {
if (tag) {
self -> ctor.tag = tag;
self -> ctor.selector = selector;
}
self -> ctor.method = method;
continue;
}
if (selector == (Method) dtor) {
if (tag) {
self -> dtor.tag = tag;
self -> dtor.selector = selector;
}
self -> dtor.method = method;
continue;
}
if (selector == (Method) delete) {
if (tag) {
self -> delete.tag = tag;
self -> delete.selector = selector;
}
self -> delete.method = method;
continue;
}
if (selector == (Method) new) {
if (tag) {
self -> new.tag = tag;
self -> new.selector = selector;
}
self -> new.method = method;
continue;
}
#ifndef NDEBUG
if ( selector == (Method) debug )
{
if (tag)
{
self -> debug.tag = tag;
self -> debug.selector = selector;
}
self -> debug.method = method;
continue;
}
#endif
}
return self;
}
static void
catch (int sig)
{
assert( sig == 0 );
}
static const struct Class _Object = {
{ MAGIC, & _Class },
"Object", & _Object, sizeof(struct Object),
{ "", (Method) 0, (Method) Object_ctor },
{ "", (Method) 0, (Method) Object_dtor },
{ "delete", (Method) delete, (Method) Object_delete },
{ "", (Method) 0, (Method) Object_new },
};
static const struct Class _Class = {
{ MAGIC, & _Class },
"Class", & _Object, sizeof(struct Class),
{ "", (Method) 0, (Method) Class_ctor },
{ "", (Method) 0, (Method) Class_dtor },
{ "delete", (Method) delete, (Method) Class_delete },
{ "", (Method) 0, (Method) Object_new },
};
/* This constant should not be visible outside of this file scope */
#undef MAGIC
/*-
* Copyright © 2018, 2019, 2020 Simon Deeley and respective contributors.
* All rights reserved.
*
* This source file contains source code derived from source code and topics
* written by Axel-Tobias Schreiner Copyright © 1993. Other contributors
* are attributed where necessary in the source code comments.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed in Up! Copyright Simon Deeley
* and contributors.
* 4. Neither the authors name nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $@ object.h - (c) 2019 Simon Deeley $
*/
#ifndef _OBJECT_H
#define _OBJECT_H
#include <sys/types.h>
extern const void * const Object ( void );
extern const void * const Class ( void );
typedef void (* Method) ( void );
struct Method
{
const char *tag;
Method selector;
Method method;
};
struct Object
{
unsigned long magic;
const void *class;
};
struct Class
{
struct Object _;
const char * name;
const void * super;
size_t size;
struct Method ctor;
struct Method dtor;
struct Method delete;
struct Method new;
};
void *cast ( const void * restrict class, const void * restrict _self );
/* Standard object methods */
const char * type_of ( const void * _self );
const struct Class *class_of ( const void * _self );
size_t size_of ( const void * _self );
int is_a ( const void * restrict _self, const void * restrict class );
int is_of ( const void * restrict _self, const void * restrict class );
Method responds_to ( const void * _self, const char * tag );
/* Constructors and destructors */
void *ctor ( void * _self, va_list * app );
void *dtor ( void * _self );
struct Object *new ( const void * _self, ... );
void delete ( const void * _self );
/* Dynamically-linked methods to call parent methods */
const struct Class *super ( const void * _self );
void *super_ctor ( const void * restrict _class, void * restrict _self, va_list * app );
void *super_dtor ( const void * restrict _class, const void * restrict _self );
struct Object *super_new ( const void * restrict _class, const void * restrict _self, va_list * app );
void super_delete ( const void * restrict _class, void * restrict _self );
#endif /* _OBJECT_H */
/*-
* Copyright © 2018, 2019, 2020 Simon Deeley and respective contributors.
* All rights reserved.
*
* This source file contains source code derived from source code and topics
* written by Axel-Tobias Schreiner Copyright © 1993. Other contributors
* are attributed where necessary in the source code comments.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed in Up! Copyright Simon Deeley
* and contributors.
* 4. Neither the authors name nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $@ symtabl.c - (c) 2019 Simon Deeley $
*/
#include "symtabl.h"
#if !defined( LOAD_FACTOR_BITS )
#define LOAD_FACTOR_BITS 2
#endif
#include "hash.h"
#include "object.h"
#include "symbol.h"
/* assert() */
#include <assert.h>
/* errno & ENOMEM */
#include <errno.h>
/* va_arg() */
#include <stdarg.h>
/* calloc(), realloc(), free() */
#include <stdlib.h>
/* strlen() */
#include <string.h>
/*
* Symbol Table.
*
* Source code derrived from topics and source code published by Jason Lue
* Copyright © 2019.
*
* Basic Properties:
* 1. Items are arranged in clusters. Items of the same bucket belong to the
* same cluster.
* 2. A cluster is located at or after its bucket.
* 3. Clusters are arranged in the order of its bucket. For example, Cluster 1
* is before cluster 2.
* 4. Each cluster is located at its closest possible position to its bucket
* while maintaining cluster properties 1, 2 and 3.
*
* Derived Properties:
* 1. No gaps between bucket of cluster and its head. If it had, we could have
* shifted the cluster up to reduce the distance of the cluster. The shifted
* arrangement has closer distance from its bucket.
* 2. No gaps inside the cluster. If it had, we could have shifted the item of
* the cluster right after that gap up to improve its distance.
* 3. Distances maintained in the table are non-negative. Each item in the
* cluster maintains the distance from its actual position to its perfect
* position (bucket). Since clusters reside at or after its perfect position,
* the distance is always non-negative.
* 4. Distances of items in the same cluster are in continuous ascending order.
* Since items of the same cluster are contiguous, the distances are
* continuous.
*/
typedef struct SymbolTable
{
/* Always aligned as first property */
const struct Object _;
const void **symbols;
long *distances;
long size; /* Table capacity is 2^size */
long used; /* Number of entries */
long remaps; /* Diff between current size and previous sizes */
long remap_end; /* Last remapped symbol position */
} symboltable;
/* Forward declarations of implementation-specific functions */
static const void *_SymbolTable;
static struct SymbolTable *SymbolTable_new ( void *_self, va_list *args );
static void SymbolTable_delete ( const void *_self );
static void SymbolTable_debug ( const void *_self );
static void resize ( struct SymbolTable *self );
static void remap ( struct SymbolTable *self );
static const long needs_remapping ( struct SymbolTable *self,
const uint32_t hash,
const long position,
long *new_position);
static const long lookup_index ( const struct SymbolTable *self,
const char *key,
const uint32_t key_hash,
const uintptr_t key_scope,
const unsigned long bucket,
const long end,
long * restrict insert_position,
long * restrict insert_distance);
static void insert_and_adjust ( struct SymbolTable *self,
const void *symbol,
long insert_position,
long *last_affected_position );
static const void *remove_and_adjust ( const struct SymbolTable *self,
long position,
long *last_affected_position);
static const long tail_of_cluster_by_position ( const struct SymbolTable *self, long position );
static const long end_of_cluster_by_bucket ( const struct SymbolTable *self, const long bucket );
static const unsigned long bucket_by_hash ( const uint32_t hash, const long size );
static inline const long bucket_by_position ( const struct SymbolTable *self, const long position );
static inline const long capacity ( const struct SymbolTable *self );
static ulang_always_inline const long threshold ( const struct SymbolTable *self );
/* Return static instance of a symbol table object. */
const void * const
SymbolTable ( void )
{
return _SymbolTable ? _SymbolTable :
( _SymbolTable = new( Class(),
"Symbol Table", Object(), sizeof( struct SymbolTable ),
new, "", SymbolTable_new,
delete, "", SymbolTable_delete,
debug, "", SymbolTable_debug,
(void *) 0 ) );
}
/*
* Lookup a symbol in the table.
*
* On success returns a pointer to the symbol in the table, otherwise returns
* null.
*/
const void
*lookup ( const void *_self,
const char *key,
const uintptr_t scope )
{
struct SymbolTable * self = cast( SymbolTable(), _self );
uint32_t hash = strhash( key, strlen( key ), scope );
long bucket = bucket_by_hash( hash, self -> size );
long end = capacity( self );
long position = lookup_index( self, key, hash, scope, bucket, end, NULL, NULL );
/* If position is a positive non-zero number then we've found an entry
* matching the key, so return it now.
*/
if ( position >= 0 )
return self -> symbols[ position ];
/* The other possibility is that the entry exists but is mapped to a
* previous table size. If this is the case the we need to iterate
* over previous table sizes (held in remaps).
*/
for ( register long i = 1; i <= self -> remaps; i++ )
{
/* The previous table size is always log2_buckets minus remap size,
* which is held in 'i' per iteration.
*/
long prev_bucket = bucket_by_hash( hash, self -> size - i );
if ( prev_bucket <= self -> remap_end )
{
/* Lookup the index based on the previous table size */
position = lookup_index( self, key, hash, scope, prev_bucket, self -> remap_end + 1, NULL, NULL );
/* If position is a positive non-zero number then we've found
* the symbol. However, because we've found it then we need
* to remap it to the current table size first.
*/
if ( position >= 0 )
{
needs_remapping( self, hash, position, &position );
return self -> symbols[ position ];
}
}
}
/* If we've reached here then we haven't found a match so we can return
* a null value to indicate such.
*/
return NULL;
}
void
install ( const void * restrict _self,
const void * restrict symbol )
{
struct SymbolTable *self = cast( SymbolTable(), _self );
const char *n = name( symbol );
uint32_t h = hash( symbol );
uintptr_t s = scope( symbol );
unsigned long bucket = bucket_by_hash( h, self -> size );
long end = capacity( self );
long insert_position = 0;
long insert_distance = 0;
/* lookup index position in the table for the key */
long position = lookup_index( self, n, h, s, bucket, end, &insert_position, &insert_distance);
/* If a match is found, e.g. non-zero positive integer then the symbol in
* theory already exists, so we can overwrite it. However, we must first
* check that it is not a constant or builtin function as these cannot be
* overwritten.
*/
if ( position >= 0 )
{
self -> symbols[ position ] = symbol;
return;
}
insert_and_adjust( self, symbol, insert_position, NULL );
/* Check we have not exceeded load factor. If we have then we should try to
* resize the table.
*/
if ( ++self -> used > threshold( self ) )
resize( self );
/* Check that, following any resize, we need to re-map our symbols position
* in the table.
*/
if ( needs_remapping( self, h, insert_position, NULL ) )
remap( self );
}
void
uninstall ( const void *_self,
const char *key,
const uintptr_t scope )
{
struct SymbolTable *self = cast( SymbolTable(), _self );
uint32_t hash = strhash( key, strlen( key ), scope );
long bucket = bucket_by_hash( hash, self -> size );
long end = capacity( self );
long pos = lookup_index( self, key, hash, scope, bucket, end, NULL, NULL);
/* Symbol wasn't found, so return */
if( pos < 0 )
return;
const void * symbol = remove_and_adjust( self, pos, NULL );
delete( symbol );
self -> used--;
}
/* Allocate memory and resources for new symbol table */
static struct SymbolTable
*SymbolTable_new ( void *_self, va_list *args )
{
struct SymbolTable *self = cast( SymbolTable(),
super_new( SymbolTable(), _self, args ) );
self -> size = va_arg( *args, const long );
if ( NULL == ( self -> symbols = calloc( capacity( self ), sizeof( void * ) ) ) )
{
errno = ENOMEM;
free( self -> symbols );
return NULL;
}
if ( NULL == ( self -> distances = calloc( capacity( self ), sizeof( long ) ) ) )
{
errno = ENOMEM;
free( self -> distances );
return NULL;
}
return self;
}
/* Free resources from a symbol table */
static void
SymbolTable_delete ( const void *_self )
{
struct SymbolTable *self = cast( SymbolTable(), _self );
/* Loop through all allocations. Add 1 to account for immediate
* subtraction assignment.
*/
for ( register long i = capacity( self ) + 1; i--; )
{
/* Delete anything we find */
if ( self -> symbols[ i ] ) delete( self -> symbols[ i ] );
}
free( self -> distances );
/* Pass pointer to parent object for clean-up */
super_delete( SymbolTable(), self );
}
/*
* Lookup an index position in the table.
*
* Based on the fact that a cluster is located at or after its bucket, we know
* the head of cluster is at or after bucket 'b'. So the search range starts
* from b. From b onwards, the first position with bucket of 'b' is the head
* of the cluster b if it exists. From b on, the last position with bucket of
* b is the tail of the cluster b if it exists. When we reach next cluster, or
* an empty position, or the end of the table, we know cluster b doesn’t exist.
*/
static const long
lookup_index ( const struct SymbolTable *self,
const char *key,
const uint32_t key_hash,
const uintptr_t key_scope,
const unsigned long bucket,
const long end,
long * restrict insert_position,
long * restrict insert_distance)
{
if ( NULL == self -> symbols )
return -1;
long i = bucket;
/* If we don’t find positions of the bucket before we reach the end of table,
* or find empty space or a cluster greater than the bucket, we know that
* the neither the cluster or the bucket exist.
*/
for (; i < end && self -> symbols[ i ] != NULL && ( i - self -> distances[ i ] ) <= bucket; i++ )
{
/* Starting from the bucket we loop through until we find a gap (empty
* position), a larger cluster, or end of table. When cluster is right
* we compare hash and key and scope to find the match.
*/
if ( ( i - self -> distances[ i ] ) == bucket
&& self -> symbols[ i ]
&& hash( self -> symbols[ i ] ) == key_hash
&& scope( self -> symbols[ i ] ) == key_scope )
{
return i;
}
}
if ( insert_position )
*insert_position = i;
if ( insert_distance )
*insert_distance = i - bucket;
/* No match found, return an impossible index */
return -1;
}
static void
insert_and_adjust ( struct SymbolTable *self,
const void *symbol,
long insert_position,
long *last_affected_position )
{
long distance = 0;
while ( 1 )
{
if ( insert_position >= capacity( self ) )
{
resize( self );
self -> symbols[ insert_position ] = symbol;
self -> distances[ insert_position ] = distance;
if ( last_affected_position )
*last_affected_position = insert_position;
return;
}
if ( self -> symbols[ insert_position ] == NULL )
{
/* Table position is empty */
self -> symbols[ insert_position ] = symbol;
self -> distances[ insert_position ] = distance;
if (last_affected_position )
*last_affected_position = insert_position;
return;
}
const void * t = self -> symbols[ insert_position ];
long next = tail_of_cluster_by_position( self, insert_position ) + 1;
distance += next - insert_position;
self -> symbols[ insert_position ] = symbol;
self -> distances[ insert_position ] = distance;
symbol = t;
insert_position = next;
}
}
static const void *
remove_and_adjust ( const struct SymbolTable *self,
long position,
long *last_affected_position)
{
const void * entry = self -> symbols[ position ];
//self -> symbols[ position ] = NULL;
self -> distances[ position ] = 0;
long cap = capacity( self ) -1;
while ( 1 )
{
if ( position == cap || self->symbols[position+1] == NULL || self->distances[position+1] == 0 )
{
self -> symbols[position] = NULL;
if ( last_affected_position )
*last_affected_position = position;
return entry;
}
long next = tail_of_cluster_by_position( self, position + 1 );
self -> symbols[ position ] = self -> symbols[ next ];
self -> distances[ position ] -= next - position;
position = next;
}
return entry;
}
static void
resize ( struct SymbolTable *self )
{
long prev_capacity = capacity( self );
self -> size++;
long new_capacity = capacity( self );
self -> symbols = realloc( self -> symbols, new_capacity * sizeof( void * ) );
for ( long i = prev_capacity; i <= new_capacity; i++ )
{
self -> symbols[ i ] = NULL;
self -> distances[ i ] = 0;
}
self -> remap_end = prev_capacity;
self -> remaps++;
}
static void
remap ( struct SymbolTable *self )
{
/* max entries to remap at a time */
long register rem = 16;
while ( self -> remap_end >= 0 && rem > 0 )
{
if ( self -> symbols[ self -> remap_end ] != NULL
&& needs_remapping( self, hash( self -> symbols[ self -> remap_end ]), self -> remap_end, NULL ) )
self -> remap_end--;
else
rem--;
}
if ( self -> remap_end <= 0 )
self -> remaps = 0;
}
static const long
needs_remapping ( struct SymbolTable *self,
const uint32_t hash,
const long position,
long *new_position)
{
/* Current bucket */
unsigned long current = bucket_by_position( self, position );
//current += self -> distances[ position ];
/* Expected bucket */
unsigned long expected = bucket_by_hash( hash, self -> size );
if ( current == expected )
return 0;
const void * sym = remove_and_adjust( self, position, new_position );
assert( sym );
long insert_position = end_of_cluster_by_bucket( self, expected );
if ( new_position )
*new_position = insert_position;
insert_and_adjust( self, sym, insert_position, new_position );
self -> distances[ position ] = 0;
return 1;
}
static const long
tail_of_cluster_by_position ( const struct SymbolTable *self,
long position )
{
long bucket = position - self -> distances[ position ];
long end = capacity( self );
while ( position < end
&& self -> symbols[ position ] != NULL
&& (position - self -> distances[ position ]) == bucket )
{
position++;
}
return position - 1;
}
static const long
end_of_cluster_by_bucket ( const struct SymbolTable *self,
const long bucket )
{
long i = bucket;
long end = capacity( self );
while ( i < end
&& self -> symbols[ i ] != NULL
&& ( bucket - self -> distances[ i ] ) <= bucket )
{
i++;
}
return i;
}
/* Get symbol location by position */
static ulang_always_inline const long
bucket_by_position ( const struct SymbolTable *self,
const long position )
{
return position - self -> distances[ position ];
}
/* Get symbol location by hash */
static const unsigned long
bucket_by_hash ( const uint32_t hash, const long size )
{
/* Give up now if there isn't any size... */
if ( size <= 0 ) return 0;
/* Expand the hash into a wider type */
unsigned long hash__ = hash;
/* The first thing we do is we map the hash value into the full 64 bit
* range of numbers. So we multiply the incoming hash value with 2^64/phi
* which is approximately 11400714819323198485. (the number
* 11400714819323198486 is closer but we don’t want multiples of two
* because that would throw away one bit. Multiplying with that number
* will overflow and will wrap around the whole 64 bit range in a nice
* pattern, giving an even distribution across the whole range from 0 to
* 2^64.
*/
hash__ *= 11400714819323198485llu;
/* Return the n-th bits from the hash by right shifting 64-n bits from
* the hash. We start at 64 because we've mapped the hash into
* a 64-bit wide range.
*/
hash__ >>= ( 64 - size );
/* Ensure returned number is bound within the table range */
assert( hash__ >= 0 && hash__ <= 1 << size );
return hash__;
}
static ulang_always_inline const long
capacity ( const struct SymbolTable *self )
{
/* the table has an advertised size of table_size, represented by
* log2_table_size, and an internal table size.
*
* 2^log2_buckets is the table_size
*/
return ( 1 << self -> size ) + self -> size;
}
/*
* Returns a threshold number of calculated from the table capacity and
* a pre-defined constant load factor.
*/
static ulang_always_inline const long
threshold ( const struct SymbolTable *self )
{
/* One advantage of clustered hashing is its load factor. It can have
* really high load factors with little sacrifice of speed. 75% is tested
* with normally single digit max distances from the cluster’s bucket to
* its tail.
*/
return capacity( self ) - ( capacity( self ) >> ULANG_LOAD_FACTOR_BITS ) ;
}
/*-
* Copyright © 2018, 2019, 2020 Simon Deeley and respective contributors.
* All rights reserved.
*
* This source file contains source code derived from source code and topics
* written by Axel-Tobias Schreiner Copyright © 1993. Other contributors
* are attributed where necessary in the source code comments.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed in Up! Copyright Simon Deeley
* and contributors.
* 4. Neither the authors name nor the names of the contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $@ symtabl.h - (c) 2019 Simon Deeley $
*/
#ifndef _SYMTABL_H
#define _SYMTABL_H
#include <sys/types.h>
extern const void * const SymbolTable ( void );
const void *lookup ( const void *_self, const char *key, const uintptr_t scope );
void install ( const void * restrict _self, const void * restrict symbol );
void uninstall ( const void *_self, const char *key, const uintptr_t scope );
#endif /* _SYMTABL_H */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment