Skip to content

Instantly share code, notes, and snippets.

@Mr-Geekman
Mr-Geekman / binary_classification.py
Created July 8, 2022 10:15
[ETNA] Binary classification example
from typing import List
from typing import Optional
import numpy as np
import pandas as pd
from etna.models.base import BaseAdapter
from etna.models.base import MultiSegmentModel
from sklearn.base import ClassifierMixin
We can't make this file beautiful and searchable because it's too large.
target,segment,timestamp
1149.87,m1,2012-03-01
1053.8002,m1,2012-04-01
1388.8798,m1,2012-05-01
1783.3702,m1,2012-06-01
1921.0252,m1,2012-07-01
2704.9449,m1,2012-08-01
4184.4135,m1,2012-09-01
4148.3542,m1,2012-10-01
2620.7251,m1,2012-11-01
import pandas as pd
from etna.datasets import TSDataset
df = pd.read_csv(DATA_PATH / "tourism_monthly.csv", parse_dates=["timestamp"])
ts = TSDataset(df=TSDataset.to_dataset(df), freq="MS")
>>> ts.info()
<class 'etna.datasets.TSDataset'>
num_segments: 366
num_exogs: 0
num_regressors: 0
num_known_future: 0
freq: MS
>>> len(ts.index)
333
import time
from etna.metrics import SMAPE
from etna.models import CatBoostMultiSegmentModel
from etna.pipeline import Pipeline
from etna.transforms import LagTransform
from etna.transforms import SegmentEncoderTransform
HORIZON = 24
N_FOLDS = 3
>>> baseline_result = get_baseline_metrics(ts=ts, horizon=HORIZON, n_folds=N_FOLDS)
>>> print(f"SMAPE: {baseline_result['SMAPE']:.2f}")
SMAPE: 27.03
>>> print(f"Time: {baseline_result['time']:.2f}s")
Time: 46.16s
def get_metrics_by_horizon(ts: TSDataset, horizon: int, total_points: int):
if total_points % horizon != 0:
raise ValueError("Horizon should divide total number of points!")
n_folds = total_points // horizon
transforms = [
LagTransform(
in_column="target",
lags=list(range(horizon, MAX_LAG)),
from etna.pipeline import AutoRegressivePipeline
def get_metrics_by_step(ts: TSDataset, step: int, horizon: int, n_folds: int):
transforms = [
LagTransform(
in_column="target",
lags=list(range(step, MAX_LAG)),
out_column="lag_target",
),
from etna.ensembles import DirectEnsemble
from etna.pipeline import assemble_pipelines
def get_metrics_by_horizon_step(
ts: TSDataset, horizon_step: int, horizon: int, n_folds: int
):
if horizon % horizon_step != 0:
raise ValueError("Horizon step should devide horizon!")