karngyan/HashTableSeparateChaining.cpp

## HashTableSeparateChaining.cpp
/*

	An implementation of a hash-table
	using separate chaining with a linked list.

	@author: Gyan Prakash Karn, karngyan@gmail.com

*/

#include<iostream>
#include<limits>
#include<vector>
#include<algorithm>
#include<string>
#include<cfloat>

template<class K, class V>
class Entry {

	template<class A, class B>
	friend std::ostream& operator<<(std::ostream &strm, const Entry<A, B> & entry) {
		return strm << entry.key << " => " << entry.value;
	}

public:

	std::hash<K> H;
	long long int hash;
	K key;
	V value;

	template<class A, class B>
	Entry(A k, B v) {
		key = k;
		value = v;
		hash = H(key);
	}

	bool operator== (const Entry & other) {
		if (hash != other.hash) return false;
		return key == other.key;
	}
};


template<class K, class V>
class HashTableSeparateChaining {

	constexpr static int DEFAULT_CAPACITY = 3;
	constexpr static double DEFAULT_LOAD_FACTOR = 0.75;
	double maxLoadFactor;
	int capacity, threshold, sze = 0;
	std::vector<std::vector <Entry<K, V>>> table;
	std::hash<K> H;


public:
	//designated constructor
	HashTableSeparateChaining(int cap, double mLF) {
		if (cap < 0)
			throw std::invalid_argument("Illegal capacity");
		if (mLF <= 0 or mLF == DBL_MAX)
			throw std::invalid_argument("Illegal maxLoadFactor");
		maxLoadFactor = mLF;
		capacity = cap;
		threshold = (int) (capacity * maxLoadFactor);
		table.clear();
		table.resize(capacity);
	}

	HashTableSeparateChaining(int cap) : HashTableSeparateChaining(cap, DEFAULT_LOAD_FACTOR){}

	HashTableSeparateChaining() : HashTableSeparateChaining(DEFAULT_CAPACITY, DEFAULT_LOAD_FACTOR){}

	// return number of elements currently inside the hash table
	int size() {
		return sze;
	}

	// returns true/false depending on whether teh hash-table is empty
	bool empty() {
		return sze == 0;
	}

	// clear all content
	void clear() {
		for (int i = 0 ; i < capacity ; ++i) table[i].clear();
		sze = 0;
	}

	// returns true/false depending on whether a key is in the hash table
	bool hasKey(K key) {
		int bucketIndex = normalizeIndex(H(key));
		return bucketSeekEntry(bucketIndex, key) != NULL;
	}

	bool containsKey(K key) {
		return hasKey(key);
	}

	// insert, put, add all place a value in the hash-table
	void put(K key, V value) {
		insert(key, value);
	}

	void add(K key, V value) {
		insert(key, value);
	}

	void insert(K key, V value) {
		Entry<K, V> newEntry(key, value);
		int bucketIndex = normalizeIndex(newEntry.hash);
		bucketInsertEntry(bucketIndex, newEntry);
	}

	// Gets a key's values from the HT and returns the value.
	V* get(K key) {
		int bucketIndex = normalizeIndex(H(key));
		Entry<K, V> *entry = bucketSeekEntry(bucketIndex, key);
		if (entry != NULL) return entry->value;
		return NULL;
	}

	// Removes a key from the HT and returns the value.
	V* remove(K key) {
		int bucketIndex = normalizeIndex(H(key));
		return bucketRemoveEntry(bucketIndex, key);
	}

	// Returns the list of keys found within the hash table
	std::vector<K> keys() {

		std::vector<K> keys(size());
		for (auto bucket : table) {
			if (bucket != NULL) {
				for (auto entry : bucket) {
					keys.push_back(entry.key);
				}
			}
		}

		return keys;
	}

	// Returns the list of values found within the hash table
	std::vector<V> values() {

		std::vector<V> values(size());
		for (auto bucket : table) {
			if (bucket != NULL) {
				for (auto entry : bucket) {
					values.push_back(entry.value);
				}
			}
		}

		return values;
	}

	void showAll() {
		std::cout << "{\n";
		for (int i = 0 ; i < capacity ; ++i) {
			if (table[i].size()) {
				for (auto entry : table[i]) {
					std::cout << "\t" << entry << "\n";
				}
			}
		}
		std::cout << "}\n";
	}

private:
	// converts a hash value to an index
	// basically removing negative sign (i.e. hghest bit)
	// placing it in domain [0, capacity)
	int normalizeIndex(int keyHash) {
		return (keyHash & 0x7FFFFFFF) % capacity;
	}

	// Removes an entry from a given bucket if exists
	V* bucketRemoveEntry(int bucketIndex, K key) {
		Entry<K, V> *entry = bucketSeekEntry(bucketIndex, key);

		if (entry != NULL) {
			std::vector <Entry<K, V>> &links = table[bucketIndex];
			auto nodeToErase = std::find(links.begin(), links.end(), *entry);
			links.erase(nodeToErase);
			--sze;
			return &(entry->value);
		} else return NULL;
	}

	// Inserts an entry in a given bucket only if the entry does not
	// alreasy exist in the given bucket, but if it does -> update it
	void bucketInsertEntry(int bucketIndex, Entry<K, V> entry) {

		std::vector <Entry<K, V>> &bucket = table[bucketIndex];

		Entry<K, V> * existentEntry = bucketSeekEntry(bucketIndex, entry.key);
		if (existentEntry != NULL) {
			existentEntry->value = entry.value;
		} else {
			bucket.push_back(entry);
			if (++sze > threshold) resizeTable();
		}
	}

	// Finds and returns a particular entry in a given bucket if it exists,
	// return NULL otherwise
	Entry<K, V> * bucketSeekEntry(int bucketIndex, K key) {

		if (table[bucketIndex].empty()) return NULL;

		for (size_t i = 0 ; i < table[bucketIndex].size() ; ++i) {
			Entry<K, V> * entry = &table[bucketIndex][i];
			if (entry->key == key) return entry;
		}

		return NULL;
	}

	// Resizes the internal table holding buckets of entries
	void resizeTable() {

		int oldCapacity = capacity;
		capacity *= 2;
		threshold = (int) (capacity * maxLoadFactor);

		std::vector< std::vector <Entry<K, V>> > newTable(capacity);
		int tableLength = oldCapacity;
		for (int i = 0 ; i < tableLength ; ++i) {
			if (table[i].size()) {
				for (Entry<K, V> entry : table[i]) {
					int bucketIndex = normalizeIndex(entry.hash);
					newTable[bucketIndex].push_back(entry);
				}
			}

			// Help the GC
			table[i].clear();
		}

		table = newTable;
	}
};

int main() {

	HashTableSeparateChaining<std::string, std::string> map;
	map.insert("tourist", "red");
	map.insert("rpuneet", "purple");

	map.showAll();

	map.remove("rpuneet");
	map.showAll();


	return 0;

}
	/*

	An implementation of a hash-table
	using separate chaining with a linked list.

	@author: Gyan Prakash Karn, karngyan@gmail.com

	*/

	#include<iostream>
	#include<limits>
	#include<vector>
	#include<algorithm>
	#include<string>
	#include<cfloat>

	template<class K, class V>
	class Entry {

	template<class A, class B>
	friend std::ostream& operator<<(std::ostream &strm, const Entry<A, B> & entry) {
	return strm << entry.key << " => " << entry.value;
	}

	public:

	std::hash<K> H;
	long long int hash;
	K key;
	V value;

	template<class A, class B>
	Entry(A k, B v) {
	key = k;
	value = v;
	hash = H(key);
	}

	bool operator== (const Entry & other) {
	if (hash != other.hash) return false;
	return key == other.key;
	}
	};



	template<class K, class V>
	class HashTableSeparateChaining {

	constexpr static int DEFAULT_CAPACITY = 3;
	constexpr static double DEFAULT_LOAD_FACTOR = 0.75;
	double maxLoadFactor;
	int capacity, threshold, sze = 0;
	std::vector<std::vector <Entry<K, V>>> table;
	std::hash<K> H;


	public:
	//designated constructor
	HashTableSeparateChaining(int cap, double mLF) {
	if (cap < 0)
	throw std::invalid_argument("Illegal capacity");
	if (mLF <= 0 or mLF == DBL_MAX)
	throw std::invalid_argument("Illegal maxLoadFactor");
	maxLoadFactor = mLF;
	capacity = cap;
	threshold = (int) (capacity * maxLoadFactor);
	table.clear();
	table.resize(capacity);
	}

	HashTableSeparateChaining(int cap) : HashTableSeparateChaining(cap, DEFAULT_LOAD_FACTOR){}

	HashTableSeparateChaining() : HashTableSeparateChaining(DEFAULT_CAPACITY, DEFAULT_LOAD_FACTOR){}

	// return number of elements currently inside the hash table
	int size() {
	return sze;
	}

	// returns true/false depending on whether teh hash-table is empty
	bool empty() {
	return sze == 0;
	}

	// clear all content
	void clear() {
	for (int i = 0 ; i < capacity ; ++i) table[i].clear();
	sze = 0;
	}

	// returns true/false depending on whether a key is in the hash table
	bool hasKey(K key) {
	int bucketIndex = normalizeIndex(H(key));
	return bucketSeekEntry(bucketIndex, key) != NULL;
	}

	bool containsKey(K key) {
	return hasKey(key);
	}

	// insert, put, add all place a value in the hash-table
	void put(K key, V value) {
	insert(key, value);
	}

	void add(K key, V value) {
	insert(key, value);
	}

	void insert(K key, V value) {
	Entry<K, V> newEntry(key, value);
	int bucketIndex = normalizeIndex(newEntry.hash);
	bucketInsertEntry(bucketIndex, newEntry);
	}

	// Gets a key's values from the HT and returns the value.
	V* get(K key) {
	int bucketIndex = normalizeIndex(H(key));
	Entry<K, V> *entry = bucketSeekEntry(bucketIndex, key);
	if (entry != NULL) return entry->value;
	return NULL;
	}

	// Removes a key from the HT and returns the value.
	V* remove(K key) {
	int bucketIndex = normalizeIndex(H(key));
	return bucketRemoveEntry(bucketIndex, key);
	}

	// Returns the list of keys found within the hash table
	std::vector<K> keys() {

	std::vector<K> keys(size());
	for (auto bucket : table) {
	if (bucket != NULL) {
	for (auto entry : bucket) {
	keys.push_back(entry.key);
	}
	}
	}

	return keys;
	}

	// Returns the list of values found within the hash table
	std::vector<V> values() {

	std::vector<V> values(size());
	for (auto bucket : table) {
	if (bucket != NULL) {
	for (auto entry : bucket) {
	values.push_back(entry.value);
	}
	}
	}

	return values;
	}

	void showAll() {
	std::cout << "{\n";
	for (int i = 0 ; i < capacity ; ++i) {
	if (table[i].size()) {
	for (auto entry : table[i]) {
	std::cout << "\t" << entry << "\n";
	}
	}
	}
	std::cout << "}\n";
	}

	private:
	// converts a hash value to an index
	// basically removing negative sign (i.e. hghest bit)
	// placing it in domain [0, capacity)
	int normalizeIndex(int keyHash) {
	return (keyHash & 0x7FFFFFFF) % capacity;
	}

	// Removes an entry from a given bucket if exists
	V* bucketRemoveEntry(int bucketIndex, K key) {
	Entry<K, V> *entry = bucketSeekEntry(bucketIndex, key);

	if (entry != NULL) {
	std::vector <Entry<K, V>> &links = table[bucketIndex];
	auto nodeToErase = std::find(links.begin(), links.end(), *entry);
	links.erase(nodeToErase);
	--sze;
	return &(entry->value);
	} else return NULL;
	}

	// Inserts an entry in a given bucket only if the entry does not
	// alreasy exist in the given bucket, but if it does -> update it
	void bucketInsertEntry(int bucketIndex, Entry<K, V> entry) {

	std::vector <Entry<K, V>> &bucket = table[bucketIndex];

	Entry<K, V> * existentEntry = bucketSeekEntry(bucketIndex, entry.key);
	if (existentEntry != NULL) {
	existentEntry->value = entry.value;
	} else {
	bucket.push_back(entry);
	if (++sze > threshold) resizeTable();
	}
	}

	// Finds and returns a particular entry in a given bucket if it exists,
	// return NULL otherwise
	Entry<K, V> * bucketSeekEntry(int bucketIndex, K key) {

	if (table[bucketIndex].empty()) return NULL;

	for (size_t i = 0 ; i < table[bucketIndex].size() ; ++i) {
	Entry<K, V> * entry = &table[bucketIndex][i];
	if (entry->key == key) return entry;
	}

	return NULL;
	}

	// Resizes the internal table holding buckets of entries
	void resizeTable() {

	int oldCapacity = capacity;
	capacity *= 2;
	threshold = (int) (capacity * maxLoadFactor);

	std::vector< std::vector <Entry<K, V>> > newTable(capacity);
	int tableLength = oldCapacity;
	for (int i = 0 ; i < tableLength ; ++i) {
	if (table[i].size()) {
	for (Entry<K, V> entry : table[i]) {
	int bucketIndex = normalizeIndex(entry.hash);
	newTable[bucketIndex].push_back(entry);
	}
	}

	// Help the GC
	table[i].clear();
	}

	table = newTable;
	}
	};

	int main() {

	HashTableSeparateChaining<std::string, std::string> map;
	map.insert("tourist", "red");
	map.insert("rpuneet", "purple");

	map.showAll();

	map.remove("rpuneet");
	map.showAll();


	return 0;

	}