Skip to content

Instantly share code, notes, and snippets.

@mkondratek
Created November 25, 2020 16:59
Show Gist options
  • Save mkondratek/1a86a98bde094a294a95109e7e4c5672 to your computer and use it in GitHub Desktop.
Save mkondratek/1a86a98bde094a294a95109e7e4c5672 to your computer and use it in GitHub Desktop.
[lab06] Solution for BagOfWords exercise
from collections import defaultdict
from io import TextIOWrapper
class BagOfWords(object):
def __init__(self, input):
self.dict = defaultdict(int)
if not isinstance(input, defaultdict):
if isinstance(input, str):
input_words = input.split()
elif isinstance(input, TextIOWrapper):
input_words = input.read().split()
else:
raise ValueError(f'Illegal input {type(input)} type; input must be type of str or IO')
for x in input_words:
self.dict[x] += 1
else:
self.dict = input
def __iter__(self):
sorted_keys = sorted(self.dict.keys(), key=lambda x: -self.dict[x])
for k in sorted_keys:
yield k
def __getitem__(self, item):
return self.dict[item]
def __add__(self, other):
sum_of_dicts = defaultdict(int)
sum_of_dicts.update(**self.dict)
for k in other.dict.keys():
sum_of_dicts[k] += other.dict[k]
return BagOfWords(sum_of_dicts)
def __setitem__(self, key, value):
self.dict[key] = value
def __repr__(self):
"""Zwraca reprezentację naszego słownika jako string"""
return str(dict(self.dict))
test_bows = []
bow = BagOfWords("ala ma kota ala ma ala")
test_bows.append(bow)
bow = BagOfWords(open("plik.txt"))
test_bows.append(bow)
for bow in test_bows:
assert str(bow) == "{'ala': 3, 'ma': 2, 'kota': 1}"
assert 'ala' in bow
for word in bow:
print(word)
assert bow["ala"] == 3
bow1 = BagOfWords("ala ma kota ala ma ala")
bow2 = BagOfWords("tomek tez ma kota")
bow3 = bow1 + bow2
assert 'tomek' not in bow1
assert 'tomek' in bow3
assert 'ala' in bow3
print(bow3) # ala:3, ma:3, kota:2, tez:1, tomek:1
assert bow1["ala"] == 3
assert bow3["ala"] == 3
bow3['tomek'] = 10
for el in bow3:
print(el)
first = next(iter(bow3))
assert first == 'tomek'
print("OK")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment