Skip to content

Instantly share code, notes, and snippets.

@nebiyuelias1
Created December 1, 2023 04:49
Show Gist options
  • Save nebiyuelias1/7513d186e3b959af668f1113c6bf4a27 to your computer and use it in GitHub Desktop.
Save nebiyuelias1/7513d186e3b959af668f1113c6bf4a27 to your computer and use it in GitHub Desktop.
IForestASD with pysad
import numpy as np
from sklearn.utils import shuffle
from pysad.models import IForestASD
from pysad.transform.preprocessing import InstanceUnitNormScaler
from pysad.transform.postprocessing import RunningAveragePostprocessor
from pysad.utils import Data
from pysad.evaluation import AUROCMetric
from pysad.utils.array_streamer import ArrayStreamer
from tqdm import tqdm
import pandas as pd
if __name__ == "__main__":
np.random.seed(61) # Fix random seed.
# Load data from CSV using pandas
df = pd.read_csv('data/fraudTrain.csv')
# Assuming the 'is_fraud' column contains the labels (1 for fraud, 0 for normal)
labels = df['is_fraud']
# Drop non-numeric columns and the label column
features = df.drop(['trans_date_trans_time', 'merchant', 'category', 'first', 'last', 'gender',
'street', 'city', 'state', 'job', 'dob', 'trans_num', 'is_fraud'], axis=1)
# Shuffle the data
features, labels = shuffle(features, labels)
iterator = ArrayStreamer(shuffle=False) # Init streamer to simulate streaming data.
model = IForestASD() # Init xStream anomaly detection model.
preprocessor = InstanceUnitNormScaler() # Init normalizer.
postprocessor = RunningAveragePostprocessor(window_size=5) # Init running average postprocessor.
auroc = AUROCMetric() # Init area under the receiver-operating characteristics curve metric.
for X, y in tqdm(iterator.iter(features.values[100:1000], labels.values[100:1000])): # Stream data.
# Assuming X is 1-dimensional
X_numeric = preprocessor.fit_transform_partial(X[[5, 6, 7, 9]])
X[[5, 6, 7, 9]] = X_numeric # Replace the transformed elements back into the original array
score = model.fit_score_partial(X) # Fit model to and score the instance.
score = postprocessor.fit_transform_partial(score) # Apply running averaging to the score.
auroc.update(y, score) # Update AUROC metric.
# Output resulting AUROCS metric.
print("AUROC: ", auroc.get())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment