Skip to content

Instantly share code, notes, and snippets.

@Carkham
Last active November 11, 2022 18:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Carkham/a5162c9298bc51fec648a458a3437008 to your computer and use it in GitHub Desktop.
Save Carkham/a5162c9298bc51fec648a458a3437008 to your computer and use it in GitHub Desktop.

This Gist contains some python scripts for the djl timeseries package. Including drawing prediction graphs and coarse-grained data, etc.

plot

If you want to visualize your forecasts please run:

python plot.py -p YOUR_PRED_LENGTH -f YOUR_FREQUENCY -t YOUR_STARGT_TIME -target-path YOUR_TARGET_PATH -samples-path YOUR_SAMPLES_PATH

plot result

forecast

data coarse grain

run

python m5_data_coarse_grain.py -p YOUR_DATA_PATH
import pandas as pd
import os
import argparse
def coarse_grain(path: str, end: int):
print(f"Processing {os.path.basename(path)}")
df = pd.read_csv(path)
_df = [df.loc[:, ("id", "item_id", "dept_id", "cat_id", "store_id", "state_id")].copy()]
for i in range(1, end, 7):
tmp = pd.DataFrame(df.loc[:, "d_" + str(i): "d_" + str(i + 7)].sum(axis=1),
columns=["w_" + str(i // 7 + 1)])
_df.append(tmp)
_df = pd.concat(_df, axis=1)
file_name = "weekly_" + os.path.basename(path)
dir_name = os.path.dirname(path)
output_path = os.path.join(dir_name, file_name)
_df.to_csv(output_path, index=False)
print(f"Save the {file_name} to {output_path}")
def arg_parse():
parser = argparse.ArgumentParser()
parser.add_argument("-p",
"--path",
help="M5 forecasting file path")
return parser.parse_args()
if __name__ == "__main__":
args = arg_parse()
if args.path is None:
args.path = os.path.join(os.path.expanduser("~"), "Desktop", "m5example", "m5-forecasting-accuracy")
coarse_grain(os.path.join(args.path, "sales_train_validation.csv"), 1912)
coarse_grain(os.path.join(args.path, "sales_train_evaluation.csv"), 1940)
import numpy as np
import pandas as pd
import os
import torch
from gluonts.torch.model.deepar import DeepAREstimator
from gluonts.torch.distributions.distribution_output import NegativeBinomialOutput
from gluonts.dataset.common import load_datasets, ListDataset
from gluonts.dataset.field_names import FieldName
from gluonts.dataset.loader import InferenceDataLoader
from gluonts.transform import AdhocTransform
from gluonts.torch.batchify import batchify
single_prediction_length = 4
submission_prediction_length = single_prediction_length * 2
m5_input_path = os.path.join(os.path.expanduser("~"), "Desktop", "m5-forecasting-accuracy") # Your Path to dataset
submission = False
if submission:
prediction_length = submission_prediction_length
else:
prediction_length = single_prediction_length
sales_train_validation = pd.read_csv(f'{m5_input_path}/weekly_sales_train_validation.csv')
train_df = sales_train_validation.drop(["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"], axis=1)
train_target_values = train_df.values
if submission == True:
test_target_values = [np.append(ts, np.ones(submission_prediction_length) * np.nan) for ts in train_df.values]
else:
test_target_values = train_target_values.copy()
train_target_values = [ts[:-single_prediction_length] for ts in train_df.values]
m5_dates = [pd.Timestamp("2011-01-29", freq='1W') for _ in range(len(sales_train_validation))]
train_ds = ListDataset([
{
FieldName.TARGET: target,
FieldName.START: start
}
for (target, start) in zip(train_target_values,
m5_dates)
], freq="W")
test_ds = ListDataset([
{
FieldName.TARGET: target,
FieldName.START: start
}
for (target, start) in zip(test_target_values,
m5_dates)
], freq="W")
estimator = DeepAREstimator(
prediction_length=prediction_length,
freq="W",
distr_output=NegativeBinomialOutput(),
trainer_kwargs={"max_epochs": 10}
)
predictor = estimator.train(train_ds)
prediction_length = predictor.prediction_length
lead_time = predictor.lead_time
def truncate_target(data):
data = data.copy()
target = data[FieldName.TARGET]
assert (
target.shape[-1] >= prediction_length
) # handles multivariate case (target_dim, history_length)
data[FieldName.TARGET] = target[..., : -prediction_length - lead_time]
return data
data_loader = InferenceDataLoader(AdhocTransform(truncate_target).apply(test_ds),
transform=predictor.input_transform,
batch_size=predictor.batch_size,
stack_fn=lambda data: batchify(data, device=predictor.device),
)
predictor.prediction_net.eval()
with torch.no_grad():
for batch in data_loader:
inputs = [batch[k] for k in predictor.input_names]
trace = torch.jit.trace(predictor.prediction_net, tuple(inputs))
trace.save("./deepar.pt") # replace with your path
break
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
import argparse
def sample_plot(samples: np.ndarray,
index: pd.PeriodIndex,
prediction_intervals=(50.0, 90.0),
color='g'):
def alpha_for_percentile(p):
return (p / 100.0) ** 0.3
for c in prediction_intervals:
assert 0.0 <= c <= 100.0
ps = [50.0] + [
50.0 + f * c / 2.0
for c in prediction_intervals
for f in [-1.0, +1.0]
]
percentiles_sorted = sorted(set(ps))
sample_idxs = [int(np.round((samples.shape[0] - 1) * p)) for p in percentiles_sorted]
sorted_samples = np.sort(samples, axis=0)
ps_data = [sorted_samples[idx, :] for idx in sample_idxs]
i_p50 = len(percentiles_sorted) // 2
p50_data = ps_data[i_p50]
p50_series = pd.Series(data=p50_data, index=index.to_timestamp())
p50_series.plot(color=color, ls="-", label=f"median")
for i in range(len(percentiles_sorted) // 2):
ptile = percentiles_sorted[i]
alpha = alpha_for_percentile(ptile)
plt.fill_between(
index.to_timestamp(),
ps_data[i],
ps_data[-i - 1],
facecolor=color,
alpha=alpha,
interpolate=True
)
def plot_prob_forecasts(ts_entry, samples, index, prediction_length, path):
plot_length = 150
prediction_intervals = (50, 67, 95, 99)
legend = ["observations", "median prediction"] + [f"{k}% prediction interval" for k in prediction_intervals][::-1]
_, ax = plt.subplots(1, 1, figsize=(10, 7))
ts_entry[-plot_length:].plot(ax=ax)
sample_plot(samples, index, prediction_intervals=prediction_intervals)
ax.axvline(ts_entry.index[-prediction_length], color='r')
plt.legend(legend, loc="upper left")
if path is None:
plt.show()
plt.clf()
else:
plt.savefig('{}forecast.png'.format(path))
plt.close()
def arg_parse():
parser = argparse.ArgumentParser()
parser.add_argument("-p",
"--prediction-length",
type=int,
required=True,
help="Prediction length of the forecast")
parser.add_argument("-f",
"--freq",
required=True,
help="Prediction frequency of the forecast")
parser.add_argument("-t",
"--start-time",
required=True,
help="Start of the forecast")
parser.add_argument("--target-path",
required=True,
help="Target NDArray path")
parser.add_argument("--samples-path",
required=True,
help="Samples NDArray path")
parser.add_argument("-o",
"--output-dir",
help="Plot output directory")
return parser.parse_args()
if __name__ == "__main__":
args = arg_parse()
target = np.load(args.target_path)['target']
start_date = pd.Period(args.start_time, freq=args.freq)
samples = np.load(args.samples_path)['samples']
index = pd.period_range(start_date + target.shape[-1] - args.prediction_length, periods=args.prediction_length,
freq=args.freq)
ts = pd.Series(data=target, index=pd.period_range(start_date, periods=target.shape[-1], freq=start_date.freq))
if args.output_dir is not None:
plot_log_path = args.output_dir
directory = os.path.dirname(plot_log_path)
if not os.path.exists(directory):
os.makedirs(directory)
else:
plot_log_path = None
print("Plotting time series predictions ...")
plot_prob_forecasts(ts, samples, index, args.prediction_length, plot_log_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment