sharvaridhote/load_data.py

## load_data.py
def load_data(df, split=0.2):
    """
    Function From Spacy
    Prepare the training data as per Spacy format
    Parameters:
        df: training data in pandas dataframe
        split: float - Splitting dataframe to train and validation set. Defaults to 0.2
    Returns:
        tuples: train and validation text and labels
    """
    # Shuffle the data
    df_train = df_tolist(df)
    random.shuffle(df_train)
    texts, labels = zip(*df_train)
    # get the categories for each sentence
    cats = [{"POSITIVE": bool(y), "NEGATIVE": not bool(y)} for y in labels]
    # Splitting the training and evaluation data
    split = int(len(df_train) * split)
    return (texts[:split], cats[:split]), (texts[split:], cats[split:])
	def load_data(df, split=0.2):
	"""
	Function From Spacy
	Prepare the training data as per Spacy format
	Parameters:
	df: training data in pandas dataframe
	split: float - Splitting dataframe to train and validation set. Defaults to 0.2
	Returns:
	tuples: train and validation text and labels
	"""
	# Shuffle the data
	df_train = df_tolist(df)
	random.shuffle(df_train)
	texts, labels = zip(*df_train)
	# get the categories for each sentence
	cats = [{"POSITIVE": bool(y), "NEGATIVE": not bool(y)} for y in labels]
	# Splitting the training and evaluation data
	split = int(len(df_train) * split)
	return (texts[:split], cats[:split]), (texts[split:], cats[split:])