EspenHa/ucr_anomaly.py

## ucr_anomaly.py
from __future__ import annotations

import re
from dataclasses import dataclass
from pathlib import Path

import numpy as np

data_dir = Path(
    "AnomalyDatasets_2021/UCR_TimeSeriesAnomalyDatasets2021/FilesAreInHere/UCR_Anomaly_FullData/"
)

pattern = re.compile(r"^([0-9]{3})_UCR_Anomaly_([a-zA-Z0-9]+)_([0-9]+)_([0-9]+)_([0-9]+).txt$")


@dataclass
class UCR_AnomalySequence:
    name: str
    id: int

    train_start: int  # starts at 1
    train_stop: int

    anom_start: int
    anom_stop: int

    data: np.ndarray

    @property
    def train_data(self):
        # not 100% sure about their indexing, this might be off by one
        # assume we include both right and left index
        return self.data[self.train_start - 1 : self.train_stop]

    @property
    def anom_data(self):
        # not 100% sure about their indexing, this might be off by one
        # assume we include both right and left index
        return self.data[self.anom_start - 1 : self.anom_stop]

    @classmethod
    def create(cls, path: Path) -> UCR_AnomalySequence:

        assert path.exists()
        data = np.loadtxt(path, dtype=np.float32)

        match = pattern.match(path.name)
        assert match

        id, name, train_stop, anom_start, anom_stop = match.groups()

        return cls(
            name=name,
            id=int(id),
            train_start=1,
            train_stop=int(train_stop),
            anom_start=int(anom_start),
            anom_stop=int(anom_stop),
            data=data,
        )

    @classmethod
    def create_by_id(cls, id: int) -> UCR_AnomalySequence:
        return cls.create((next(data_dir.glob(f"{id:03d}_*"))))

    @classmethod
    def create_by_name(cls, name: str) -> UCR_AnomalySequence:
        return cls.create((next(data_dir.glob(f"*_UCR_Anomaly_{name}_*"))))


if __name__ == "__main__":
    full_name = "089_UCR_Anomaly_DISTORTEDtiltAPB1_100000_114283_114350.txt"
    name = "DISTORTEDtiltAPB1"
    id = 89

    # all equivalent
    b = UCR_AnomalySequence.create(data_dir / full_name)
    b = UCR_AnomalySequence.create_by_id(id)
    b = UCR_AnomalySequence.create_by_name(name)

    print(b.data)
    print(b.data.shape)

    print(b.train_data)
    print(b.train_data.shape)

    print(b.anom_data)
    print(b.anom_data.shape)
	from __future__ import annotations

	import re
	from dataclasses import dataclass
	from pathlib import Path

	import numpy as np

	data_dir = Path(
	"AnomalyDatasets_2021/UCR_TimeSeriesAnomalyDatasets2021/FilesAreInHere/UCR_Anomaly_FullData/"
	)

	pattern = re.compile(r"^([0-9]{3})_UCR_Anomaly_([a-zA-Z0-9]+)_([0-9]+)_([0-9]+)_([0-9]+).txt$")


	@dataclass
	class UCR_AnomalySequence:
	name: str
	id: int

	train_start: int # starts at 1
	train_stop: int

	anom_start: int
	anom_stop: int

	data: np.ndarray

	@property
	def train_data(self):
	# not 100% sure about their indexing, this might be off by one
	# assume we include both right and left index
	return self.data[self.train_start - 1 : self.train_stop]

	@property
	def anom_data(self):
	# not 100% sure about their indexing, this might be off by one
	# assume we include both right and left index
	return self.data[self.anom_start - 1 : self.anom_stop]

	@classmethod
	def create(cls, path: Path) -> UCR_AnomalySequence:

	assert path.exists()
	data = np.loadtxt(path, dtype=np.float32)

	match = pattern.match(path.name)
	assert match

	id, name, train_stop, anom_start, anom_stop = match.groups()

	return cls(
	name=name,
	id=int(id),
	train_start=1,
	train_stop=int(train_stop),
	anom_start=int(anom_start),
	anom_stop=int(anom_stop),
	data=data,
	)

	@classmethod
	def create_by_id(cls, id: int) -> UCR_AnomalySequence:
	return cls.create((next(data_dir.glob(f"{id:03d}_*"))))

	@classmethod
	def create_by_name(cls, name: str) -> UCR_AnomalySequence:
	return cls.create((next(data_dir.glob(f"_UCR_Anomaly_{name}_"))))


	if __name__ == "__main__":
	full_name = "089_UCR_Anomaly_DISTORTEDtiltAPB1_100000_114283_114350.txt"
	name = "DISTORTEDtiltAPB1"
	id = 89

	# all equivalent
	b = UCR_AnomalySequence.create(data_dir / full_name)
	b = UCR_AnomalySequence.create_by_id(id)
	b = UCR_AnomalySequence.create_by_name(name)

	print(b.data)
	print(b.data.shape)

	print(b.train_data)
	print(b.train_data.shape)

	print(b.anom_data)
	print(b.anom_data.shape)