Skip to content

Instantly share code, notes, and snippets.

@alex-hse-repository
Last active June 1, 2022 07:30
Show Gist options
  • Save alex-hse-repository/6ce9c25f4bae62b3cbdb87b17ed25ee5 to your computer and use it in GitHub Desktop.
Save alex-hse-repository/6ce9c25f4bae62b3cbdb87b17ed25ee5 to your computer and use it in GitHub Desktop.
from etna.models import CatBoostModelMultiSegment
from etna.transforms import SegmentEncoderTransform
from etna.transforms import LagTransform
from etna.analysis import metric_per_segment_distribution_plot
HORIZON = 62
np.random.seed(42)
segments = np.random.choice(data["Page"].values, size=100)
ts = get_ts(segments)
ts.fit_transform(
[
TimeSeriesImputerTransform(in_column="target", strategy="running_mean", window=3),
TimeSeriesImputerTransform(in_column="target", strategy="zero"),
]
)
transforms = [
DensityOutliersTransform(in_column="target", window_size=30, n_neighbors=9, distance_coef=1),
SegmentEncoderTransform(),
FourierTransform(period=365.25, order=2, out_column="fourier"),
DateFlagsTransform(day_number_in_week=True, day_number_in_month=True, is_weekend=True, out_column="df"),
LagTransform(in_column="target", lags=list(range(HORIZON, HORIZON + 21)), out_column="lag"),
]
pipeline = Pipeline(model=CatBoostModelMultiSegment(), transforms=transforms, horizon=HORIZON)
metrics, forecast, _ = pipeline.backtest(ts, metrics=[SMAPE()], n_folds=3)
metric_per_segment_distribution_plot(metrics, metric_name="SMAPE", plot_type="box")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment