Last active
March 10, 2021 09:59
-
-
Save hotchkiss/4652940 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
file: hashtable.py | |
language: python3 | |
author: sps@cs.rit.edu Sean Strout | |
author: jeh@cs.rit.edu James Heliotis | |
author: anh@cs.rit.edu Arthur Nunes-Harwitt | |
author: hotchkiss@rit.edu Collin Hotchkiss | |
description: open addressing Hash Table for CS 242 Lecture | |
""" | |
import copy | |
class HashTable( object ): | |
""" | |
The HashTable data structure contains a collection of values | |
where each value is located by a hashable key. | |
No two values may have the same key, but more than one | |
key may have the same value. | |
""" | |
__slots__ = ( "table", "size" ) | |
def __init__( self, capacity=89 ): | |
""" | |
Create a hash table. | |
The capacity parameter determines its initial size. | |
""" | |
self.table = [ None ] * capacity | |
for i in range( capacity ): | |
self.table[ i ] = [] | |
self.size = 0 | |
def __str__( self ): | |
""" | |
Return the entire contents of this hash table, | |
one chain of entries per line. | |
""" | |
result = "" | |
for i in range( len( self.table ) ): | |
result += str( i ) + ": " | |
result += str( self.table[i] ) + "\n" | |
return result | |
class _Entry( object ): | |
""" | |
A nested class used to hold key/value pairs. | |
""" | |
__slots__ = ( "key", "value" ) | |
def __init__( self, entryKey, entryValue ): | |
self.key = entryKey | |
self.value = entryValue | |
def __str__( self ): | |
return "(" + str( self.key ) + ", " + str( self.value ) + ")" | |
def hash_function( val, n ): | |
""" | |
Compute a hash of the val string that is in [0 ... n). | |
""" | |
#hashcode = ( len( val ) % n ) | |
hashcode = 0 | |
for i in range( len( val ) ): | |
hashcode = (71 * hashcode + ord( val[ i ] ) ) % n | |
return hashcode | |
def keys( hTable ): | |
""" | |
Return a list of keys in the given hashTable. | |
""" | |
result = [] | |
for item in hTable.table: | |
if item != []: | |
for entry in item: | |
result.append( entry.key ) | |
return result | |
def contains( hTable, key ): | |
""" | |
Return True iff hTable has an entry with the given key. | |
""" | |
index = hash_function( key, len( hTable.table ) ) | |
lst = hTable.table[ index ] | |
for i in lst: | |
if i.key == key: | |
return True | |
return False | |
def put( hTable, key, value ): | |
""" | |
Using the given hash table, set the given key to the | |
given value. If the key already exists, the given value | |
will replace the previous one already in the table. | |
If the table is full, an Exception is raised. | |
""" | |
ratio = load( hTable ) | |
print( ratio ) | |
if ratio >= .75: | |
hTable = rehash( hTable ) | |
index = hash_function( key, len( hTable.table ) ) | |
if hTable.table[ index ] == []: | |
hTable.table[ index ] = [ HashTable._Entry( key, value ) ] | |
hTable.size += 1 | |
else: | |
for i in range( len( hTable.table[ index ] ) ): | |
if hTable.table[ index ][ i ].key == key: | |
hTable.table[ index ][ i ].value = value | |
return True | |
hTable.table[ index ].append( HashTable._Entry( key, value ) ) | |
return True | |
def get( hTable, key ): | |
""" | |
Return the value associated with the given key in | |
the given hash table. | |
Precondition: contains(hTable, key) | |
""" | |
index = hash_function( key, len( hTable.table ) ) | |
if hTable.table[ index ] == []: | |
raise Exception( "Hash table does not contain key." ) | |
else: | |
lst = hTable.table[ index ] | |
for i in lst: | |
if i.key == key: | |
return i.value | |
raise Exception( "Hash table does not contain key." ) | |
def imbalance( hTable ): | |
""" | |
Compute average length of all non-empty chains | |
preconditions: none | |
""" | |
numOfChains = 0 | |
sum = 0 | |
for i in range( len( hTable.table ) ): | |
if hTable.table[ i ] != []: | |
sum += len( hTable.table[ i ] ) | |
numOfChains += 1 | |
print( str( sum ) + ", " + str( numOfChains ) ) | |
avg = sum / numOfChains | |
return ( avg - 1 ) | |
def load( hTable ): | |
""" | |
Checks ratio of items in table to table size | |
""" | |
sum = 0 | |
size = len( hTable.table ) | |
for i in range( size ): | |
sum += len( hTable.table[ i ] ) | |
return sum / size | |
def rehash( hTable ): | |
""" | |
Performs a rehash every time the table starts to fill up. | |
""" | |
newN = ( 2 * len( hTable.table ) ) + 1 | |
print( "New capacity: " + str( newN ) ) | |
newTable = HashTable( newN ) | |
for i in range( len( hTable.table ) ): | |
for item in hTable.table[ i ]: | |
index = hash_function( item.key, newN ) | |
newTable.table[ index ] = [ HashTable._Entry( item.key, item.value ) ] | |
hTable.table = copy.deepcopy( newTable.table ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
description: Word Count Program for CS 242 | |
file: word_count.py | |
language: python3 | |
author: sps@cs.rit.edu Sean Strout | |
author: jeh@cs.rit.edu James Heliotis | |
author: bks@cs.rit.edu ben k steele | |
author: hotchkiss@rit.edu Collin Hotchkiss | |
""" | |
# import the functions needed from the hashtable.py module. | |
from hashtable import HashTable, get, keys, put, contains, imbalance | |
def word_count( hTable, filename ): | |
""" | |
Record the frequency of all words in the named file in the hashtable. | |
word_count : HashTable String -> HashTable | |
""" | |
# Read the words of the text file into the word count table. | |
fd = open( filename ) | |
for line in fd: | |
for word in line.split(): | |
# using a regular expression argument to strip(), | |
# strip out punctuation and convert token to lower-case. | |
word = word.strip(",.\"\';:-!?").lower() | |
if contains( hTable, word ): | |
count = get( hTable, word ) | |
put( hTable, word, count + 1 ) | |
else: | |
put( hTable, word, 1 ) | |
fd.close() # closing the file is a 'good practice'. | |
return hTable | |
def printSummary( theTable ): | |
""" | |
printSummary prints a summary of information about the hash table contents. | |
printSummary : HashTable -> NoneType | |
""" | |
# Display the entire table! | |
print( "Unique words:", theTable.size ) | |
# Find the most common word in the text file. | |
total = 0 | |
maxWord = "" | |
maxCount = 0 | |
for key in keys( theTable ): | |
thisCount = get( theTable, key ) | |
total += thisCount | |
if thisCount > maxCount: | |
maxCount = thisCount | |
maxWord = key | |
print( "There are " + str( len( keys( theTable ) ) ) + " words." ) | |
print( "Total words:", total ) | |
print( '"' + maxWord + "\" appeared ", str( maxCount ), | |
" times, more than any other word." ) | |
def printTable( hTable ): | |
""" | |
Print the contents of the given hash table. | |
Each key/value pair is displayed in parentheses, tuple-style. | |
All pairs appear on a single line. | |
printTable : HashTable -> NoneType | |
""" | |
print( "Word Count Data ---------------" ) | |
lcount = 0 | |
ltext = 0 | |
for key in keys( hTable ): | |
# print( "(" + key + "," + str( get( hTable, key ) ) + ")", end=" " ) | |
txt = "(" + key + "," + str( get( hTable, key ) ) + ")" | |
ltext += len( txt ) | |
if ltext > 51: | |
print( txt ) | |
ltext = 0 | |
else: | |
print( txt, end=" " ) | |
print() | |
def main(): | |
capacity = int( input( "Enter capacity (-1 for default): " ) ) | |
if capacity < 0: | |
hTable = HashTable() | |
else: | |
hTable = HashTable( capacity ) | |
filename = input( "Enter filename: " ) | |
wordTable = word_count( hTable, filename ) | |
printSummary( wordTable ) | |
print( "Average chain length: " + str( imbalance( wordTable ) ) ) | |
while True: | |
print( "Commands: k[ey] <word> f[ind] <word> q[uit] ? ", end=" " ) | |
response = input( ":- " ) # the displayed prompt | |
query = response.split() | |
if len( response ) == 0 or not response[0] in "fkq": | |
print( response + " invalid. Please enter a command and a word." ) | |
response = "" | |
continue | |
if query[0] == "k": | |
print( "( " + query[1] + " in text ) is " \ | |
+ str( contains( wordTable, query[1] ) ) + "." ) | |
if query[0] == "f": | |
if contains( wordTable, query[1] ): | |
print( query[1] + " appears " \ | |
+ str( get( wordTable, query[1] ) ) + " times." ) | |
else: | |
print( query[1] + " in not in the text." ) | |
if query[0] == "q": | |
break | |
# | |
answer = input( "Do you want to see the entire table?(y/n) " ) | |
if answer != "y": | |
return | |
printTable( wordTable ) | |
# run the main program | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment