Skip to content

Instantly share code, notes, and snippets.

@bluedistro
Created December 31, 2018 14:18
Show Gist options
  • Save bluedistro/b36a8299656b3ada8daeffb0c8f2329b to your computer and use it in GitHub Desktop.
Save bluedistro/b36a8299656b3ada8daeffb0c8f2329b to your computer and use it in GitHub Desktop.
data preprocessing
imdb_dir = '../datasets/aclImdb/aclImdb'
train_dir = os.path.join(imdb_dir, 'train')
labels = list()
texts = list()
# Processing the labels of the raw IMDB data
for label_type in ['neg', 'pos']:
dir_name = os.path.join(train_dir, label_type)
for fname in os.listdir(dir_name):
if fname[-4:] == '.txt':
f = open(os.path.join(dir_name, fname))
texts.append(f.read())
f.close()
if label_type == 'neg':
labels.append(0)
else:
labels.append(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment