Skip to content

Instantly share code, notes, and snippets.

@sismetanin
Created August 1, 2019 12:30
Show Gist options
  • Save sismetanin/2ada2f8d8c2e3fc7177dd9c285a3ddef to your computer and use it in GitHub Desktop.
Save sismetanin/2ada2f8d8c2e3fc7177dd9c285a3ddef to your computer and use it in GitHub Desktop.
def preprocessData(dataFilePath, mode):
conversations = []
labels = []
with io.open(dataFilePath, encoding="utf8") as finput:
finput.readline()
for line in finput:
line = line.strip().split('\t')
for i in range(1, 4):
line[i] = tokenize(line[i])
if mode == "train":
labels.append(emotion2label[line[4]])
conv = line[1:4]
conversations.append(conv)
if mode == "train":
return np.array(conversations), np.array(labels)
else:
return np.array(conversations)
texts_train, labels_train = preprocessData('./starterkitdata/train.txt', mode="train")
texts_dev, labels_dev = preprocessData('./starterkitdata/dev.txt', mode="train")
texts_test, labels_test = preprocessData('./starterkitdata/test.txt', mode="train")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment