Last active
January 15, 2023 09:01
-
-
Save EspenHa/cac5fd5bbccc1c7ac81ac64901ca4f09 to your computer and use it in GitHub Desktop.
Code for reading UCR Anomaly 2021 files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import annotations | |
import re | |
from dataclasses import dataclass | |
from pathlib import Path | |
import numpy as np | |
data_dir = Path( | |
"AnomalyDatasets_2021/UCR_TimeSeriesAnomalyDatasets2021/FilesAreInHere/UCR_Anomaly_FullData/" | |
) | |
pattern = re.compile(r"^([0-9]{3})_UCR_Anomaly_([a-zA-Z0-9]+)_([0-9]+)_([0-9]+)_([0-9]+).txt$") | |
@dataclass | |
class UCR_AnomalySequence: | |
name: str | |
id: int | |
train_start: int # starts at 1 | |
train_stop: int | |
anom_start: int | |
anom_stop: int | |
data: np.ndarray | |
@property | |
def train_data(self): | |
# not 100% sure about their indexing, this might be off by one | |
# assume we include both right and left index | |
return self.data[self.train_start - 1 : self.train_stop] | |
@property | |
def anom_data(self): | |
# not 100% sure about their indexing, this might be off by one | |
# assume we include both right and left index | |
return self.data[self.anom_start - 1 : self.anom_stop] | |
@classmethod | |
def create(cls, path: Path) -> UCR_AnomalySequence: | |
assert path.exists() | |
data = np.loadtxt(path, dtype=np.float32) | |
match = pattern.match(path.name) | |
assert match | |
id, name, train_stop, anom_start, anom_stop = match.groups() | |
return cls( | |
name=name, | |
id=int(id), | |
train_start=1, | |
train_stop=int(train_stop), | |
anom_start=int(anom_start), | |
anom_stop=int(anom_stop), | |
data=data, | |
) | |
@classmethod | |
def create_by_id(cls, id: int) -> UCR_AnomalySequence: | |
return cls.create((next(data_dir.glob(f"{id:03d}_*")))) | |
@classmethod | |
def create_by_name(cls, name: str) -> UCR_AnomalySequence: | |
return cls.create((next(data_dir.glob(f"*_UCR_Anomaly_{name}_*")))) | |
if __name__ == "__main__": | |
full_name = "089_UCR_Anomaly_DISTORTEDtiltAPB1_100000_114283_114350.txt" | |
name = "DISTORTEDtiltAPB1" | |
id = 89 | |
# all equivalent | |
b = UCR_AnomalySequence.create(data_dir / full_name) | |
b = UCR_AnomalySequence.create_by_id(id) | |
b = UCR_AnomalySequence.create_by_name(name) | |
print(b.data) | |
print(b.data.shape) | |
print(b.train_data) | |
print(b.train_data.shape) | |
print(b.anom_data) | |
print(b.anom_data.shape) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment