Skip to content

Instantly share code, notes, and snippets.

@EspenHa
Last active January 15, 2023 09:01
Show Gist options
  • Save EspenHa/cac5fd5bbccc1c7ac81ac64901ca4f09 to your computer and use it in GitHub Desktop.
Save EspenHa/cac5fd5bbccc1c7ac81ac64901ca4f09 to your computer and use it in GitHub Desktop.
Code for reading UCR Anomaly 2021 files
from __future__ import annotations
import re
from dataclasses import dataclass
from pathlib import Path
import numpy as np
data_dir = Path(
"AnomalyDatasets_2021/UCR_TimeSeriesAnomalyDatasets2021/FilesAreInHere/UCR_Anomaly_FullData/"
)
pattern = re.compile(r"^([0-9]{3})_UCR_Anomaly_([a-zA-Z0-9]+)_([0-9]+)_([0-9]+)_([0-9]+).txt$")
@dataclass
class UCR_AnomalySequence:
name: str
id: int
train_start: int # starts at 1
train_stop: int
anom_start: int
anom_stop: int
data: np.ndarray
@property
def train_data(self):
# not 100% sure about their indexing, this might be off by one
# assume we include both right and left index
return self.data[self.train_start - 1 : self.train_stop]
@property
def anom_data(self):
# not 100% sure about their indexing, this might be off by one
# assume we include both right and left index
return self.data[self.anom_start - 1 : self.anom_stop]
@classmethod
def create(cls, path: Path) -> UCR_AnomalySequence:
assert path.exists()
data = np.loadtxt(path, dtype=np.float32)
match = pattern.match(path.name)
assert match
id, name, train_stop, anom_start, anom_stop = match.groups()
return cls(
name=name,
id=int(id),
train_start=1,
train_stop=int(train_stop),
anom_start=int(anom_start),
anom_stop=int(anom_stop),
data=data,
)
@classmethod
def create_by_id(cls, id: int) -> UCR_AnomalySequence:
return cls.create((next(data_dir.glob(f"{id:03d}_*"))))
@classmethod
def create_by_name(cls, name: str) -> UCR_AnomalySequence:
return cls.create((next(data_dir.glob(f"*_UCR_Anomaly_{name}_*"))))
if __name__ == "__main__":
full_name = "089_UCR_Anomaly_DISTORTEDtiltAPB1_100000_114283_114350.txt"
name = "DISTORTEDtiltAPB1"
id = 89
# all equivalent
b = UCR_AnomalySequence.create(data_dir / full_name)
b = UCR_AnomalySequence.create_by_id(id)
b = UCR_AnomalySequence.create_by_name(name)
print(b.data)
print(b.data.shape)
print(b.train_data)
print(b.train_data.shape)
print(b.anom_data)
print(b.anom_data.shape)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment