Created
November 25, 2020 16:59
-
-
Save mkondratek/1a86a98bde094a294a95109e7e4c5672 to your computer and use it in GitHub Desktop.
[lab06] Solution for BagOfWords exercise
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
from io import TextIOWrapper | |
class BagOfWords(object): | |
def __init__(self, input): | |
self.dict = defaultdict(int) | |
if not isinstance(input, defaultdict): | |
if isinstance(input, str): | |
input_words = input.split() | |
elif isinstance(input, TextIOWrapper): | |
input_words = input.read().split() | |
else: | |
raise ValueError(f'Illegal input {type(input)} type; input must be type of str or IO') | |
for x in input_words: | |
self.dict[x] += 1 | |
else: | |
self.dict = input | |
def __iter__(self): | |
sorted_keys = sorted(self.dict.keys(), key=lambda x: -self.dict[x]) | |
for k in sorted_keys: | |
yield k | |
def __getitem__(self, item): | |
return self.dict[item] | |
def __add__(self, other): | |
sum_of_dicts = defaultdict(int) | |
sum_of_dicts.update(**self.dict) | |
for k in other.dict.keys(): | |
sum_of_dicts[k] += other.dict[k] | |
return BagOfWords(sum_of_dicts) | |
def __setitem__(self, key, value): | |
self.dict[key] = value | |
def __repr__(self): | |
"""Zwraca reprezentację naszego słownika jako string""" | |
return str(dict(self.dict)) | |
test_bows = [] | |
bow = BagOfWords("ala ma kota ala ma ala") | |
test_bows.append(bow) | |
bow = BagOfWords(open("plik.txt")) | |
test_bows.append(bow) | |
for bow in test_bows: | |
assert str(bow) == "{'ala': 3, 'ma': 2, 'kota': 1}" | |
assert 'ala' in bow | |
for word in bow: | |
print(word) | |
assert bow["ala"] == 3 | |
bow1 = BagOfWords("ala ma kota ala ma ala") | |
bow2 = BagOfWords("tomek tez ma kota") | |
bow3 = bow1 + bow2 | |
assert 'tomek' not in bow1 | |
assert 'tomek' in bow3 | |
assert 'ala' in bow3 | |
print(bow3) # ala:3, ma:3, kota:2, tez:1, tomek:1 | |
assert bow1["ala"] == 3 | |
assert bow3["ala"] == 3 | |
bow3['tomek'] = 10 | |
for el in bow3: | |
print(el) | |
first = next(iter(bow3)) | |
assert first == 'tomek' | |
print("OK") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment