desh2608/load_holographic.py

## load_holographic.py
import numpy as np
import pandas as pd

with open('./holographic.npz') as d:
	indices = d['arr_0']
	X_train = d['arr_1']
	X_val = d['arr_2']
	y_train = d['arr_3']
	y_val = d['arr_4']

"""
While loading data in the dataframe, some lines are incorrectly read, i.e., their tweet length is >140 since multiple tweets are read as single record. I have removed these records as:
"""
data = pd.read_csv("./datasets/train/SemEval2018-T3-train-taskA_emoji.txt", sep="\t")
data = data[data['Tweet text'].map(len)<=140]

# Now you can use the "indices" on the lists in this data dictionary.
	import numpy as np
	import pandas as pd

	with open('./holographic.npz') as d:
	indices = d['arr_0']
	X_train = d['arr_1']
	X_val = d['arr_2']
	y_train = d['arr_3']
	y_val = d['arr_4']

	"""
	While loading data in the dataframe, some lines are incorrectly read, i.e., their tweet length is >140 since multiple tweets are read as single record. I have removed these records as:
	"""
	data = pd.read_csv("./datasets/train/SemEval2018-T3-train-taskA_emoji.txt", sep="\t")
	data = data[data['Tweet text'].map(len)<=140]

	# Now you can use the "indices" on the lists in this data dictionary.