Skip to content

Instantly share code, notes, and snippets.

@hotchkiss
Last active March 10, 2021 09:59
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save hotchkiss/4652940 to your computer and use it in GitHub Desktop.
Save hotchkiss/4652940 to your computer and use it in GitHub Desktop.
"""
file: hashtable.py
language: python3
author: sps@cs.rit.edu Sean Strout
author: jeh@cs.rit.edu James Heliotis
author: anh@cs.rit.edu Arthur Nunes-Harwitt
author: hotchkiss@rit.edu Collin Hotchkiss
description: open addressing Hash Table for CS 242 Lecture
"""
import copy
class HashTable( object ):
"""
The HashTable data structure contains a collection of values
where each value is located by a hashable key.
No two values may have the same key, but more than one
key may have the same value.
"""
__slots__ = ( "table", "size" )
def __init__( self, capacity=89 ):
"""
Create a hash table.
The capacity parameter determines its initial size.
"""
self.table = [ None ] * capacity
for i in range( capacity ):
self.table[ i ] = []
self.size = 0
def __str__( self ):
"""
Return the entire contents of this hash table,
one chain of entries per line.
"""
result = ""
for i in range( len( self.table ) ):
result += str( i ) + ": "
result += str( self.table[i] ) + "\n"
return result
class _Entry( object ):
"""
A nested class used to hold key/value pairs.
"""
__slots__ = ( "key", "value" )
def __init__( self, entryKey, entryValue ):
self.key = entryKey
self.value = entryValue
def __str__( self ):
return "(" + str( self.key ) + ", " + str( self.value ) + ")"
def hash_function( val, n ):
"""
Compute a hash of the val string that is in [0 ... n).
"""
#hashcode = ( len( val ) % n )
hashcode = 0
for i in range( len( val ) ):
hashcode = (71 * hashcode + ord( val[ i ] ) ) % n
return hashcode
def keys( hTable ):
"""
Return a list of keys in the given hashTable.
"""
result = []
for item in hTable.table:
if item != []:
for entry in item:
result.append( entry.key )
return result
def contains( hTable, key ):
"""
Return True iff hTable has an entry with the given key.
"""
index = hash_function( key, len( hTable.table ) )
lst = hTable.table[ index ]
for i in lst:
if i.key == key:
return True
return False
def put( hTable, key, value ):
"""
Using the given hash table, set the given key to the
given value. If the key already exists, the given value
will replace the previous one already in the table.
If the table is full, an Exception is raised.
"""
ratio = load( hTable )
print( ratio )
if ratio >= .75:
hTable = rehash( hTable )
index = hash_function( key, len( hTable.table ) )
if hTable.table[ index ] == []:
hTable.table[ index ] = [ HashTable._Entry( key, value ) ]
hTable.size += 1
else:
for i in range( len( hTable.table[ index ] ) ):
if hTable.table[ index ][ i ].key == key:
hTable.table[ index ][ i ].value = value
return True
hTable.table[ index ].append( HashTable._Entry( key, value ) )
return True
def get( hTable, key ):
"""
Return the value associated with the given key in
the given hash table.
Precondition: contains(hTable, key)
"""
index = hash_function( key, len( hTable.table ) )
if hTable.table[ index ] == []:
raise Exception( "Hash table does not contain key." )
else:
lst = hTable.table[ index ]
for i in lst:
if i.key == key:
return i.value
raise Exception( "Hash table does not contain key." )
def imbalance( hTable ):
"""
Compute average length of all non-empty chains
preconditions: none
"""
numOfChains = 0
sum = 0
for i in range( len( hTable.table ) ):
if hTable.table[ i ] != []:
sum += len( hTable.table[ i ] )
numOfChains += 1
print( str( sum ) + ", " + str( numOfChains ) )
avg = sum / numOfChains
return ( avg - 1 )
def load( hTable ):
"""
Checks ratio of items in table to table size
"""
sum = 0
size = len( hTable.table )
for i in range( size ):
sum += len( hTable.table[ i ] )
return sum / size
def rehash( hTable ):
"""
Performs a rehash every time the table starts to fill up.
"""
newN = ( 2 * len( hTable.table ) ) + 1
print( "New capacity: " + str( newN ) )
newTable = HashTable( newN )
for i in range( len( hTable.table ) ):
for item in hTable.table[ i ]:
index = hash_function( item.key, newN )
newTable.table[ index ] = [ HashTable._Entry( item.key, item.value ) ]
hTable.table = copy.deepcopy( newTable.table )
"""
description: Word Count Program for CS 242
file: word_count.py
language: python3
author: sps@cs.rit.edu Sean Strout
author: jeh@cs.rit.edu James Heliotis
author: bks@cs.rit.edu ben k steele
author: hotchkiss@rit.edu Collin Hotchkiss
"""
# import the functions needed from the hashtable.py module.
from hashtable import HashTable, get, keys, put, contains, imbalance
def word_count( hTable, filename ):
"""
Record the frequency of all words in the named file in the hashtable.
word_count : HashTable String -> HashTable
"""
# Read the words of the text file into the word count table.
fd = open( filename )
for line in fd:
for word in line.split():
# using a regular expression argument to strip(),
# strip out punctuation and convert token to lower-case.
word = word.strip(",.\"\';:-!?").lower()
if contains( hTable, word ):
count = get( hTable, word )
put( hTable, word, count + 1 )
else:
put( hTable, word, 1 )
fd.close() # closing the file is a 'good practice'.
return hTable
def printSummary( theTable ):
"""
printSummary prints a summary of information about the hash table contents.
printSummary : HashTable -> NoneType
"""
# Display the entire table!
print( "Unique words:", theTable.size )
# Find the most common word in the text file.
total = 0
maxWord = ""
maxCount = 0
for key in keys( theTable ):
thisCount = get( theTable, key )
total += thisCount
if thisCount > maxCount:
maxCount = thisCount
maxWord = key
print( "There are " + str( len( keys( theTable ) ) ) + " words." )
print( "Total words:", total )
print( '"' + maxWord + "\" appeared ", str( maxCount ),
" times, more than any other word." )
def printTable( hTable ):
"""
Print the contents of the given hash table.
Each key/value pair is displayed in parentheses, tuple-style.
All pairs appear on a single line.
printTable : HashTable -> NoneType
"""
print( "Word Count Data ---------------" )
lcount = 0
ltext = 0
for key in keys( hTable ):
# print( "(" + key + "," + str( get( hTable, key ) ) + ")", end=" " )
txt = "(" + key + "," + str( get( hTable, key ) ) + ")"
ltext += len( txt )
if ltext > 51:
print( txt )
ltext = 0
else:
print( txt, end=" " )
print()
def main():
capacity = int( input( "Enter capacity (-1 for default): " ) )
if capacity < 0:
hTable = HashTable()
else:
hTable = HashTable( capacity )
filename = input( "Enter filename: " )
wordTable = word_count( hTable, filename )
printSummary( wordTable )
print( "Average chain length: " + str( imbalance( wordTable ) ) )
while True:
print( "Commands: k[ey] <word> f[ind] <word> q[uit] ? ", end=" " )
response = input( ":- " ) # the displayed prompt
query = response.split()
if len( response ) == 0 or not response[0] in "fkq":
print( response + " invalid. Please enter a command and a word." )
response = ""
continue
if query[0] == "k":
print( "( " + query[1] + " in text ) is " \
+ str( contains( wordTable, query[1] ) ) + "." )
if query[0] == "f":
if contains( wordTable, query[1] ):
print( query[1] + " appears " \
+ str( get( wordTable, query[1] ) ) + " times." )
else:
print( query[1] + " in not in the text." )
if query[0] == "q":
break
#
answer = input( "Do you want to see the entire table?(y/n) " )
if answer != "y":
return
printTable( wordTable )
# run the main program
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment