MarksCode/thing.py

## thing.py
import string

######## Begin code which needs to be modified ##########

# Implementation 1
class MyChainDict(object):
	def __init__(self):
		self.dictSize = 10000
		self.mydict = []
		for i in range(self.dictSize):
			self.mydict.append([])
		return

	def insert(self, key, value=None):
		bucketNum = self.hashFunc(key)
		doesContain = self.contains(key)
		if not doesContain:
			self.mydict[bucketNum].append([key, value])
			return
		for x in self.mydict[bucketNum]:
			if (key in x):
				x[1] = value
				return

	def contains(self, key):
		bucketNum = self.hashFunc(key);
		doesContain = False
		for x in self.mydict[bucketNum]:
			if (key in x):
				doesContain = True
		return (doesContain)

	def value(self, key):
		bucketNum = self.hashFunc(key);
		doesContain = self.contains(key)
		if not doesContain:
			return False
		else:
			for x in self.mydict[bucketNum]:
				if (key in x):
					return x[1]

	def delete(self, key):
		bucketNum = self.hashFunc(key)
		index = 0
		for x in self.mydict[bucketNum]:
			if (key == x[0]):
				self.mydict[bucketNum].pop(index)
			else:
				index = index + 1
		return

	def get_key_values(self):
		myArray = []
		for x in self.mydict:
				for y in x:
					myArray.append((y[0], y[1]))
		return myArray

	def dump(self):
		for i in self.mydict:
			for x in i:
				print (x)


	def hashFunc(self, key):
		return hash(key)%self.dictSize;


# Implementation 2
class MyOpenLinearDict(object):
	def __init__(self):
		self.dictSize = 500000
		self.mydict = []
		for i in range(self.dictSize):
			self.mydict.append(["q", -1])
		return

	def insert(self, key, value=None):
		iterations = 0
		bucketNum = self.hashFunc(key)
		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
			bucketNum = bucketNum + 1
			iterations = iterations + 1
			if (bucketNum > self.dictSize-1):
				bucketNum = 0
		self.mydict[bucketNum] = [key, value]

	def contains(self, key):
		iterations = 0
		bucketNum = self.hashFunc(key)
		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
			bucketNum = bucketNum + 1
			iterations = iterations + 1
			if (bucketNum > self.dictSize-1):
				bucketNum = 0
		if (self.mydict[bucketNum][0] == key):
			return True
		else:
			return False

	def value(self, key):
		bucketNum = self.hashFunc(key)
		iterations = 0
		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
			bucketNum = bucketNum + 1
			iterations = iterations + 1
			if (bucketNum > self.dictSize-1):
				bucketNum = 0
		if (self.mydict[bucketNum][0] == key):
			return self.mydict[bucketNum][1]

	def delete(self, key):
		bucketNum = self.hashFunc(key)
		iterations = 0
		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
			bucketNum = bucketNum + 1
			iterations = iterations + 1
			if (bucketNum > self.dictSize-1):
				bucketNum = 0
		if (self.mydict[bucketNum][0] == key):
			self.mydict[bucketNum] = ["Deleted", -1]
		return

	def get_key_values(self):
		myArray = []
		for x in self.mydict:
			if (x[0] != "q"):
				myArray.append((x[0], x[1]))
		return myArray

	def dump(self):
		for x in self.mydict:
			print (x[0], x[1])

	def hashFunc(self, key):
		return hash(key)%self.dictSize

# Implementation 3
class MyOpenQuadDict(object):
	def __init__(self):
		self.dictSize = 500000
		self.mydict = []
		for i in range(self.dictSize):
			self.mydict.append(["q", -1])
		return

	def insert(self, key, value=None):
		iterations = 0
		bucketNum = self.hashFunc(key, iterations)
		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
			iterations = iterations + 1
			bucketNum = self.hashFunc(key, iterations)
		self.mydict[bucketNum] = [key, value]

	def contains(self, key):
		iterations = 0
		bucketNum = self.hashFunc(key, iterations)
		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
			iterations = iterations + 1
			bucketNum = self.hashFunc(key, iterations)
		if (self.mydict[bucketNum][0] == key):
			return True
		else:
			return False

	def value(self, key):
		iterations = 0
		bucketNum = self.hashFunc(key, iterations)
		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
			iterations = iterations + 1
			bucketNum = self.hashFunc(key, iterations)
		if (self.mydict[bucketNum][0] == key):
			return self.mydict[bucketNum][1]

	def delete(self, key):
		iterations = 0
		bucketNum = self.hashFunc(key, iterations)
		while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
			iterations = iterations + 1
			bucketNum = self.hashFunc(key, iterations)
		if (self.mydict[bucketNum][0] == key):
			self.mydict[bucketNum] = ["Deleted", -1]
		return

	def get_key_values(self):
		myArray = []
		for x in self.mydict:
			if (x[0] != "q"):
				myArray.append((x[0], x[1]))
		return myArray

	def dump(self):
		for x in self.mydict:
			if (x[0] != "q"):
				print ((x[0], x[1]))

	def hashFunc(self, key, iteration):
		return (hash(key)+10*iteration+10*iteration^2)%self.dictSize


class MySet(object):
	def __init__(self):
		self.myset = MyOpenQuadDict()
		return

	def insert(self, key):
		self.myset.insert(key)
		return

	def contains(self, key):
		return self.myset.contains(key)

	def dump(self):
		self.myset.dump()

######## End code which needs to be modified ##########


# Store the set of stop words in a set object
stop_words_file = "stopwords.txt"
stop_words = MySet()

with open(stop_words_file) as f:
	for l in f:
		stop_words.insert(l.strip())


# Download file from https://snap.stanford.edu/data/finefoods.txt.gz
# Remember to gunzip before using
review_file = "foods.txt"

review_words = []
for i in range(5):
	review_words.append(MyOpenQuadDict())

with open(review_file, encoding='LATIN-1') as f:
	lines = f.readlines()
	idx = 0
	num_lines = len(lines) - 2
	while idx < num_lines:
		while not lines[idx].startswith("review/score"):
			idx = idx + 1

		# Jump through hoops to satisfy the Unicode encodings
		l = lines[idx].encode('ascii', 'ignore').decode('ascii')
		l = l.strip().split(":")[1].strip()

		# Extract rating
		rating = int(float(l))
		while not lines[idx].startswith("review/text"):
			idx = idx + 1

		l = lines[idx].encode('ascii', 'ignore').decode('ascii').strip().lower()
		text = l.split(":")[1].strip()
		text = text.translate(str.maketrans("","", string.punctuation))

		# Extract words in the text
		words = text.split(" ")
		# words = [x.strip(string.punctuation) for x in text.split(" ")]
		for w in words:
			if len(w) and not stop_words.contains(w):
				if review_words[rating - 1].contains(w):
					count = review_words[rating - 1].value(w) + 1
				else:
					count = 1
				review_words[rating - 1].insert(w, count)

# Print out the top words for each of the five ratings

for i in range(5):
	freq_words = sorted(review_words[i].get_key_values(), key=lambda tup: tup[1], reverse=True)
	print(i+1, freq_words[0:20])
	import string

	######## Begin code which needs to be modified ##########

	# Implementation 1
	class MyChainDict(object):
	def __init__(self):
	self.dictSize = 10000
	self.mydict = []
	for i in range(self.dictSize):
	self.mydict.append([])
	return

	def insert(self, key, value=None):
	bucketNum = self.hashFunc(key)
	doesContain = self.contains(key)
	if not doesContain:
	self.mydict[bucketNum].append([key, value])
	return
	for x in self.mydict[bucketNum]:
	if (key in x):
	x[1] = value
	return

	def contains(self, key):
	bucketNum = self.hashFunc(key);
	doesContain = False
	for x in self.mydict[bucketNum]:
	if (key in x):
	doesContain = True
	return (doesContain)

	def value(self, key):
	bucketNum = self.hashFunc(key);
	doesContain = self.contains(key)
	if not doesContain:
	return False
	else:
	for x in self.mydict[bucketNum]:
	if (key in x):
	return x[1]

	def delete(self, key):
	bucketNum = self.hashFunc(key)
	index = 0
	for x in self.mydict[bucketNum]:
	if (key == x[0]):
	self.mydict[bucketNum].pop(index)
	else:
	index = index + 1
	return

	def get_key_values(self):
	myArray = []
	for x in self.mydict:
	for y in x:
	myArray.append((y[0], y[1]))
	return myArray

	def dump(self):
	for i in self.mydict:
	for x in i:
	print (x)


	def hashFunc(self, key):
	return hash(key)%self.dictSize;


	# Implementation 2
	class MyOpenLinearDict(object):
	def __init__(self):
	self.dictSize = 500000
	self.mydict = []
	for i in range(self.dictSize):
	self.mydict.append(["q", -1])
	return

	def insert(self, key, value=None):
	iterations = 0
	bucketNum = self.hashFunc(key)
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
	bucketNum = bucketNum + 1
	iterations = iterations + 1
	if (bucketNum > self.dictSize-1):
	bucketNum = 0
	self.mydict[bucketNum] = [key, value]

	def contains(self, key):
	iterations = 0
	bucketNum = self.hashFunc(key)
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
	bucketNum = bucketNum + 1
	iterations = iterations + 1
	if (bucketNum > self.dictSize-1):
	bucketNum = 0
	if (self.mydict[bucketNum][0] == key):
	return True
	else:
	return False

	def value(self, key):
	bucketNum = self.hashFunc(key)
	iterations = 0
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
	bucketNum = bucketNum + 1
	iterations = iterations + 1
	if (bucketNum > self.dictSize-1):
	bucketNum = 0
	if (self.mydict[bucketNum][0] == key):
	return self.mydict[bucketNum][1]

	def delete(self, key):
	bucketNum = self.hashFunc(key)
	iterations = 0
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q" and iterations <= self.dictSize-1):
	bucketNum = bucketNum + 1
	iterations = iterations + 1
	if (bucketNum > self.dictSize-1):
	bucketNum = 0
	if (self.mydict[bucketNum][0] == key):
	self.mydict[bucketNum] = ["Deleted", -1]
	return

	def get_key_values(self):
	myArray = []
	for x in self.mydict:
	if (x[0] != "q"):
	myArray.append((x[0], x[1]))
	return myArray

	def dump(self):
	for x in self.mydict:
	print (x[0], x[1])

	def hashFunc(self, key):
	return hash(key)%self.dictSize

	# Implementation 3
	class MyOpenQuadDict(object):
	def __init__(self):
	self.dictSize = 500000
	self.mydict = []
	for i in range(self.dictSize):
	self.mydict.append(["q", -1])
	return

	def insert(self, key, value=None):
	iterations = 0
	bucketNum = self.hashFunc(key, iterations)
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
	iterations = iterations + 1
	bucketNum = self.hashFunc(key, iterations)
	self.mydict[bucketNum] = [key, value]

	def contains(self, key):
	iterations = 0
	bucketNum = self.hashFunc(key, iterations)
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
	iterations = iterations + 1
	bucketNum = self.hashFunc(key, iterations)
	if (self.mydict[bucketNum][0] == key):
	return True
	else:
	return False

	def value(self, key):
	iterations = 0
	bucketNum = self.hashFunc(key, iterations)
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
	iterations = iterations + 1
	bucketNum = self.hashFunc(key, iterations)
	if (self.mydict[bucketNum][0] == key):
	return self.mydict[bucketNum][1]

	def delete(self, key):
	iterations = 0
	bucketNum = self.hashFunc(key, iterations)
	while (self.mydict[bucketNum][0] != key and self.mydict[bucketNum][0] != "q"):
	iterations = iterations + 1
	bucketNum = self.hashFunc(key, iterations)
	if (self.mydict[bucketNum][0] == key):
	self.mydict[bucketNum] = ["Deleted", -1]
	return

	def get_key_values(self):
	myArray = []
	for x in self.mydict:
	if (x[0] != "q"):
	myArray.append((x[0], x[1]))
	return myArray

	def dump(self):
	for x in self.mydict:
	if (x[0] != "q"):
	print ((x[0], x[1]))

	def hashFunc(self, key, iteration):
	return (hash(key)+10iteration+10iteration^2)%self.dictSize


	class MySet(object):
	def __init__(self):
	self.myset = MyOpenQuadDict()
	return

	def insert(self, key):
	self.myset.insert(key)
	return

	def contains(self, key):
	return self.myset.contains(key)

	def dump(self):
	self.myset.dump()

	######## End code which needs to be modified ##########


	# Store the set of stop words in a set object
	stop_words_file = "stopwords.txt"
	stop_words = MySet()

	with open(stop_words_file) as f:
	for l in f:
	stop_words.insert(l.strip())


	# Download file from https://snap.stanford.edu/data/finefoods.txt.gz
	# Remember to gunzip before using
	review_file = "foods.txt"

	review_words = []
	for i in range(5):
	review_words.append(MyOpenQuadDict())

	with open(review_file, encoding='LATIN-1') as f:
	lines = f.readlines()
	idx = 0
	num_lines = len(lines) - 2
	while idx < num_lines:
	while not lines[idx].startswith("review/score"):
	idx = idx + 1

	# Jump through hoops to satisfy the Unicode encodings
	l = lines[idx].encode('ascii', 'ignore').decode('ascii')
	l = l.strip().split(":")[1].strip()

	# Extract rating
	rating = int(float(l))
	while not lines[idx].startswith("review/text"):
	idx = idx + 1

	l = lines[idx].encode('ascii', 'ignore').decode('ascii').strip().lower()
	text = l.split(":")[1].strip()
	text = text.translate(str.maketrans("","", string.punctuation))

	# Extract words in the text
	words = text.split(" ")
	# words = [x.strip(string.punctuation) for x in text.split(" ")]
	for w in words:
	if len(w) and not stop_words.contains(w):
	if review_words[rating - 1].contains(w):
	count = review_words[rating - 1].value(w) + 1
	else:
	count = 1
	review_words[rating - 1].insert(w, count)

	# Print out the top words for each of the five ratings

	for i in range(5):
	freq_words = sorted(review_words[i].get_key_values(), key=lambda tup: tup[1], reverse=True)
	print(i+1, freq_words[0:20])