Skip to content

Instantly share code, notes, and snippets.

@mrm8488
Forked from kabirahuja2431/iterable_dataset_v1.py
Created February 23, 2020 03:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrm8488/a987bb54dab15435c2fd41b505173a3a to your computer and use it in GitHub Desktop.
Save mrm8488/a987bb54dab15435c2fd41b505173a3a to your computer and use it in GitHub Desktop.
class CustomIterableDatasetv1(IterableDataset):
def __init__(self, filename):
#Store the filename in object's memory
self.filename = filename
#And that's it, we no longer need to store the contents in the memory
def preprocess(self, text):
### Do something with text here
text_pp = text.lower().strip()
###
return text_pp
def line_mapper(self, line):
#Splits the line into text and label and applies preprocessing to the text
text, label = line.split(',')
text = self.preprocess(text)
return text, label
def __iter__(self):
#Create an iterator
file_itr = open(self.filename)
#Map each element using the line_mapper
mapped_itr = map(self.line_mapper, file_itr)
return mapped_itr
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment