Skip to content

Instantly share code, notes, and snippets.

@skannan-maf
Created November 17, 2021 11:37
Show Gist options
  • Save skannan-maf/b66086dbddd55213161eb117248e85d6 to your computer and use it in GitHub Desktop.
Save skannan-maf/b66086dbddd55213161eb117248e85d6 to your computer and use it in GitHub Desktop.
Tagged basket iterator for Doc2vec
# Basket iterator class
class tagged_basket_iterator:
def __init__(self, tb):
self._df = tb._df
self._index = 0
def __next__(self):
total_len = len(self._df)
if self._index >= total_len:
#print("Raising after {}".format(total_len))
raise StopIteration
value = self._df.iloc[self._index]
tag = self._df.index[self._index]
self._index += 1
return TaggedDocument(words=value, tags=[tag])
# basket class
class tagged_baskets:
def __init__(self, df):
self._df = df
def __iter__(self):
return tagged_basket_iterator(self)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment