monocongo/transform_to_supervised.py

## transform_to_supervised.py
import pandas as pd

def transform_to_supervised(df,
                            previous_steps=1,
                            forecast_steps=1,
                            dropnan=True):

    """
    Transforms a DataFrame containing time series data into a DataFrame
    containing data suitable for use as a supervised learning problem.

    Derived from code originally found at
    https://machinelearningmastery.com/convert-time-series-supervised-learning-problem-python/

    :param df: pandas DataFrame object containing columns of time series values
    :param previous_steps: the number of previous steps that will be included in the
                           output DataFrame corresponding to each input column
    :param forecast_steps: the number of forecast steps that will be included in the
                           output DataFrame corresponding to each input column
    :return Pandas DataFrame containing original columns, renamed <orig_name>(t), as well as
            columns for previous steps, <orig_name>(t-1) ... <orig_name>(t-n) and columns
            for forecast steps, <orig_name>(t+1) ... <orig_name>(t+n)
    """

    # original column names
    col_names = df.columns

    # list of columns and corresponding names we'll build from
    # the originals found in the input DataFrame
    cols, names = list(), list()

    # input sequence (t-n, ... t-1)
    for i in range(previous_steps, 0, -1):
        cols.append(df.shift(i))
        names += [('%s(t-%d)' % (col_name, i)) for col_name in col_names]

    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, forecast_steps):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('%s(t)' % col_name) for col_name in col_names]
        else:
            names += [('%s(t+%d)' % (col_name, i)) for col_name in col_names]

    # put all the columns together into a single aggregated DataFrame
    agg = pd.concat(cols, axis=1)
    agg.columns = names

    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)

    return agg
	import pandas as pd

	def transform_to_supervised(df,
	previous_steps=1,
	forecast_steps=1,
	dropnan=True):

	"""
	Transforms a DataFrame containing time series data into a DataFrame
	containing data suitable for use as a supervised learning problem.

	Derived from code originally found at
	https://machinelearningmastery.com/convert-time-series-supervised-learning-problem-python/

	:param df: pandas DataFrame object containing columns of time series values
	:param previous_steps: the number of previous steps that will be included in the
	output DataFrame corresponding to each input column
	:param forecast_steps: the number of forecast steps that will be included in the
	output DataFrame corresponding to each input column
	:return Pandas DataFrame containing original columns, renamed <orig_name>(t), as well as
	columns for previous steps, <orig_name>(t-1) ... <orig_name>(t-n) and columns
	for forecast steps, <orig_name>(t+1) ... <orig_name>(t+n)
	"""

	# original column names
	col_names = df.columns

	# list of columns and corresponding names we'll build from
	# the originals found in the input DataFrame
	cols, names = list(), list()

	# input sequence (t-n, ... t-1)
	for i in range(previous_steps, 0, -1):
	cols.append(df.shift(i))
	names += [('%s(t-%d)' % (col_name, i)) for col_name in col_names]

	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, forecast_steps):
	cols.append(df.shift(-i))
	if i == 0:
	names += [('%s(t)' % col_name) for col_name in col_names]
	else:
	names += [('%s(t+%d)' % (col_name, i)) for col_name in col_names]

	# put all the columns together into a single aggregated DataFrame
	agg = pd.concat(cols, axis=1)
	agg.columns = names

	# drop rows with NaN values
	if dropnan:
	agg.dropna(inplace=True)

	return agg