Created
June 10, 2020 09:48
-
-
Save ferrygun/66ae28bb4dbceab4aaf65562bf385034 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SentenceGetter(object): | |
def __init__(self, data): | |
self.n_sent = 1 | |
self.data = data | |
self.empty = False | |
agg_func = lambda s: [(w, p, t) for w, p, t in zip(s["Word"].values.tolist(), | |
s["POS"].values.tolist(), | |
s["Tag"].values.tolist())] | |
self.grouped = self.data.groupby("Sentence #").apply(agg_func) | |
self.sentences = [s for s in self.grouped] | |
def get_next(self): | |
try: | |
s = self.grouped["Sentence: {}".format(self.n_sent)] | |
self.n_sent += 1 | |
return s | |
except: | |
return None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment