Skip to content

Instantly share code, notes, and snippets.

@joshourisman
Created February 20, 2021 13:42
Show Gist options
  • Save joshourisman/32bfe54aeb6e8c5e4768cf075c6df892 to your computer and use it in GitHub Desktop.
Save joshourisman/32bfe54aeb6e8c5e4768cf075c6df892 to your computer and use it in GitHub Desktop.
class hashtable:
"""It's a hashtable..."""
__bucketuse = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
__table = {} # The actual hash table
__used = 0.0 # The number of entries in the table
def __init__(self, keys=None):
if keys:
for key in keys:
self.__table[key] = None
self.__bucketuse = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
def clear(self):
self.__bucketuse = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
def buckets(self):
return self.__bucketuse
def bucketfill(self):
if self.__used == 0:
return self.__bucketuse
else:
fill = []
for i in range(8):
fill.append((self.__bucketuse[i] / 2197) * 100)
return fill
def __len__(self):
return len(self.__table)
def chiSquared(self):
if table.__used == 0:
return 0.0
expected = self.__used / 8
terms = []
for item in self.__bucketuse:
terms.append(item)
terms = [elem-expected for elem in terms]
terms = [elem*elem for elem in terms]
terms = [elem/expected for elem in terms]
squaredChi = 0
for term in terms:
squaredChi += term
return squaredChi
# method to put put a key/data pair into the table
def put(self, item):
key = item[0]
if key < 2197:
self.__bucketuse[0] += 1
elif key < 4394:
self.__bucketuse[1] += 1
elif key < 6591:
self.__bucketuse[2] += 1
elif key < 8788:
self.__bucketuse[3] += 1
elif key < 10985:
self.__bucketuse[4] += 1
elif key < 13182:
self.__bucketuse[5] += 1
elif key < 15379:
self.__bucketuse[6] += 1
else:
self.__bucketuse[7] += 1
self.__table[item[0]] = item[1]
self.__used += 1
def display(self):
print self.__table
def getKey(self, key):
return self.__table[key]
def fill(self):
if self.__used == 0:
return 0.0
else:
return (self.__used / (len(self.__table) + 0.0))*100
def stats(self):
print "Table Size: %d" % len(self.__table)
print "Used Keys: %d" % self.__used
print "Fill Factor: %f percent" % self.fill()
def clear(self):
self.__table.clear()
# Get the name of the data file from the command line
import sys
dataFile = sys.argv[1]
# Read in the file and store the data in a useful way.
file = open(dataFile, "r")
data = file.read()
file.close()
data = data.split()
# Generate all potential keys.
keys = []
for i in range(17576): # 17576 is the max number of airport codes
keys.append(i)
# Generate actual keys for data.
actualkeys = []
for code in data:
A = (ord(code[0])-65)*26*26
B = (ord(code[1])-65)*26
C = (ord(code[2])-65)
key = (A^B^C) % 17576
actualkeys.append(key)
# Put random subsets of varying length from 200-299 into hashtable and
# check fill factor
import random
# The largest size must not be bigger than the number of unique keys or
# an infinite loop will result
#sizes = [200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 299]
#sizes = [0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 299]
sizes = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 299]
for i in range(len(sizes)):
subset = []
while len(subset) < sizes[i]:
index = random.randrange(0,len(data),1)
if data[index] in subset:
pass
else:
subset.append(data[index])
# Set up the table for the data.
table = hashtable(keys)
# Populate the table with actual data
maxKey = 0
for code in subset:
A = (ord(code[0])-65)*26*26
B = (ord(code[1])-65)*26
C = (ord(code[2])-65)
key = (A^B^C) % 17576
if key > maxKey:
maxKey = key
maxCode = code
table.put([key,code])
print "{%f, %f}," % (table.fill(), table.chiSquared())
for i in range(len(subset)):
subset.pop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment