Skip to content

Instantly share code, notes, and snippets.

@alexhawkins
Last active August 7, 2021 23:33
Show Gist options
  • Star 81 You must be signed in to star a gist
  • Fork 25 You must be signed in to fork a gist
  • Save alexhawkins/f6329420f40e5cafa0a4 to your computer and use it in GitHub Desktop.
Save alexhawkins/f6329420f40e5cafa0a4 to your computer and use it in GitHub Desktop.
Correct Implementation of a Hash Table in JavaScript
var HashTable = function() {
this._storage = [];
this._count = 0;
this._limit = 8;
}
HashTable.prototype.insert = function(key, value) {
//create an index for our storage location by passing it through our hashing function
var index = this.hashFunc(key, this._limit);
//retrieve the bucket at this particular index in our storage, if one exists
//[[ [k,v], [k,v], [k,v] ] , [ [k,v], [k,v] ] [ [k,v] ] ]
var bucket = this._storage[index]
//does a bucket exist or do we get undefined when trying to retrieve said index?
if (!bucket) {
//create the bucket
var bucket = [];
//insert the bucket into our hashTable
this._storage[index] = bucket;
}
var override = false;
//now iterate through our bucket to see if there are any conflicting
//key value pairs within our bucket. If there are any, override them.
for (var i = 0; i < bucket.length; i++) {
var tuple = bucket[i];
if (tuple[0] === key) {
//overide value stored at this key
tuple[1] = value;
override = true;
}
}
if (!override) {
//create a new tuple in our bucket
//note that this could either be the new empty bucket we created above
//or a bucket with other tupules with keys that are different than
//the key of the tuple we are inserting. These tupules are in the same
//bucket because their keys all equate to the same numeric index when
//passing through our hash function.
bucket.push([key, value]);
this._count++
//now that we've added our new key/val pair to our storage
//let's check to see if we need to resize our storage
if (this._count > this._limit * 0.75) {
this.resize(this._limit * 2);
}
}
return this;
};
HashTable.prototype.remove = function(key) {
var index = this.hashFunc(key, this._limit);
var bucket = this._storage[index];
if (!bucket) {
return null;
}
//iterate over the bucket
for (var i = 0; i < bucket.length; i++) {
var tuple = bucket[i];
//check to see if key is inside bucket
if (tuple[0] === key) {
//if it is, get rid of this tuple
bucket.splice(i, 1);
this._count--;
if (this._count < this._limit * 0.25) {
this._resize(this._limit / 2);
}
return tuple[1];
}
}
};
HashTable.prototype.retrieve = function(key) {
var index = this.hashFunc(key, this._limit);
var bucket = this._storage[index];
if (!bucket) {
return null;
}
for (var i = 0; i < bucket.length; i++) {
var tuple = bucket[i];
if (tuple[0] === key) {
return tuple[1];
}
}
return null;
};
HashTable.prototype.hashFunc = function(str, max) {
var hash = 0;
for (var i = 0; i < str.length; i++) {
var letter = str[i];
hash = (hash << 5) + letter.charCodeAt(0);
hash = (hash & hash) % max;
}
return hash;
};
HashTable.prototype.resize = function(newLimit) {
var oldStorage = this._storage;
this._limit = newLimit;
this._count = 0;
this._storage = [];
oldStorage.forEach(function(bucket) {
if (!bucket) {
return;
}
for (var i = 0; i < bucket.length; i++) {
var tuple = bucket[i];
this.insert(tuple[0], tuple[1]);
}
}.bind(this));
};
HashTable.prototype.retrieveAll = function() {
console.log(this._storage);
//console.log(this._limit);
};
/******************************TESTS*******************************/
var hashT = new HashTable();
hashT.insert('Alex Hawkins', '510-599-1930');
//hashT.retrieve();
//[ , , , [ [ 'Alex Hawkins', '510-599-1930' ] ] ]
hashT.insert('Boo Radley', '520-589-1970');
//hashT.retrieve();
//[ , [ [ 'Boo Radley', '520-589-1970' ] ], , [ [ 'Alex Hawkins', '510-599-1930' ] ] ]
hashT.insert('Vance Carter', '120-589-1970').insert('Rick Mires', '520-589-1970').insert('Tom Bradey', '520-589-1970').insert('Biff Tanin', '520-589-1970');
//hashT.retrieveAll();
/*
[ ,
[ [ 'Boo Radley', '520-589-1970' ],
[ 'Tom Bradey', '520-589-1970' ] ],
,
[ [ 'Alex Hawkins', '510-599-1930' ],
[ 'Rick Mires', '520-589-1970' ] ],
,
,
[ [ 'Biff Tanin', '520-589-1970' ] ] ]
*/
//overide example (Phone Number Change)
//
hashT.insert('Rick Mires', '650-589-1970').insert('Tom Bradey', '818-589-1970').insert('Biff Tanin', '987-589-1970');
//hashT.retrieveAll();
/*
[ ,
[ [ 'Boo Radley', '520-589-1970' ],
[ 'Tom Bradey', '818-589-1970' ] ],
,
[ [ 'Alex Hawkins', '510-599-1930' ],
[ 'Rick Mires', '650-589-1970' ] ],
,
,
[ [ 'Biff Tanin', '987-589-1970' ] ] ]
*/
hashT.remove('Rick Mires');
hashT.remove('Tom Bradey');
//hashT.retrieveAll();
/*
[ ,
[ [ 'Boo Radley', '520-589-1970' ] ],
,
[ [ 'Alex Hawkins', '510-599-1930' ] ],
,
,
[ [ 'Biff Tanin', '987-589-1970' ] ] ]
*/
hashT.insert('Dick Mires', '650-589-1970').insert('Lam James', '818-589-1970').insert('Ricky Ticky Tavi', '987-589-1970');
hashT.retrieveAll();
/* NOTICE HOW HASH TABLE HAS NOW DOUBLED IN SIZE UPON REACHING 75% CAPACITY ie 6/8. It is now size 16.
[,
,
[ [ 'Vance Carter', '120-589-1970' ] ],
[ [ 'Alex Hawkins', '510-599-1930' ],
[ 'Dick Mires', '650-589-1970' ],
[ 'Lam James', '818-589-1970' ] ],
,
,
,
,
,
[ [ 'Boo Radley', '520-589-1970' ],
[ 'Ricky Ticky Tavi', '987-589-1970' ] ],
,
,
,
,
[ [ 'Biff Tanin', '987-589-1970' ] ] ]
*/
console.log(hashT.retrieve('Lam James')); //818-589-1970
console.log(hashT.retrieve('Dick Mires')); //650-589-1970
console.log(hashT.retrieve('Ricky Ticky Tavi')); //987-589-1970
console.log(hashT.retrieve('Alex Hawkins')); //510-599-1930
console.log(hashT.retrieve('Lebron James')); //null
@thebrenthaines
Copy link

Very nice, Alex. You have captured the exact gist of what makes a hash map powerful -- look up of associated values in constant time (usually, depending upon the entropy in your hash algorithm, and how full the storage table gets.).

Speaking of hash algorithms, yours seems like it would work ok, but hashing is a tricking topic. I am wondering if you need to calculate the modulo on every iteration and I am not sure what this statement does - hash = (hash & hash) % max;

Performing an 'and' on two references to the same value seems like a no op to me.

I am also thinking that a left shift of 5 for the hash is probably not going to yield a nice distribution. You lose data pretty quickly (after 6 characters for 32 bit machines and after 12 for 64). You probably want to do some sort of rotation (where the high order bits are rolled around to the low order. Take a look at the MurmurHash3 algorithm -- https://github.com/karanlyons/murmurHash3.js/tree/master. It might be nice to learn about that.

One thing I was thinking about is that your has map works great for associating strings to values. How would you go about associating a javascript object to a value? What would the user of that library need to provide to make it work for any type / object and not just strings?

Great work!

P.S. You have implemented the bucketing hash map. That is the most common one, but there are other ways of handing collisions that are interesting.

@hellolin324
Copy link

"and I am not sure what this statement does - hash = (hash & hash) % max;"

I think hash & hash should work fine, since it is just a way to make sure the result remain an integer, not sure what the % does here.

@andreasanta
Copy link

Why instead of using an array of tuples for the bucket you don't use a faster javascript object (which is in its own way a hashmap)?

@khaledosman
Copy link

@andreasanta I think a hashmap in itself is a similar to a hashtable so you wouldn't need the entire implementation since you can achieve the same with the simple javascript object anyway, I think this is more about how it works internally.. Also javascript objects don't maintain sort order across browsers as opposed to arrays.

@chrisrocco
Copy link

Awesome. Very clean.

@bdange
Copy link

bdange commented Apr 16, 2019

This is really great work.
One question, I have been trying to reverse your retrieve function by entering phone number, but this hasn't been working.
I've tried by entering this code:

HashTable.prototype.retrieve = function(key) {
var index = this.hashFunc(key, this._limit);
var bucket = this._storage[index];

if (!bucket) {
return null;
}

for (var i = 0; i < bucket.length; i++) {
var tuple = bucket[i];
if (tuple[1] === key) {
return tuple[0];
}
};

return null;

};

Any idea of what goes wrong?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment