Created
December 1, 2023 04:49
-
-
Save nebiyuelias1/7513d186e3b959af668f1113c6bf4a27 to your computer and use it in GitHub Desktop.
IForestASD with pysad
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.utils import shuffle | |
from pysad.models import IForestASD | |
from pysad.transform.preprocessing import InstanceUnitNormScaler | |
from pysad.transform.postprocessing import RunningAveragePostprocessor | |
from pysad.utils import Data | |
from pysad.evaluation import AUROCMetric | |
from pysad.utils.array_streamer import ArrayStreamer | |
from tqdm import tqdm | |
import pandas as pd | |
if __name__ == "__main__": | |
np.random.seed(61) # Fix random seed. | |
# Load data from CSV using pandas | |
df = pd.read_csv('data/fraudTrain.csv') | |
# Assuming the 'is_fraud' column contains the labels (1 for fraud, 0 for normal) | |
labels = df['is_fraud'] | |
# Drop non-numeric columns and the label column | |
features = df.drop(['trans_date_trans_time', 'merchant', 'category', 'first', 'last', 'gender', | |
'street', 'city', 'state', 'job', 'dob', 'trans_num', 'is_fraud'], axis=1) | |
# Shuffle the data | |
features, labels = shuffle(features, labels) | |
iterator = ArrayStreamer(shuffle=False) # Init streamer to simulate streaming data. | |
model = IForestASD() # Init xStream anomaly detection model. | |
preprocessor = InstanceUnitNormScaler() # Init normalizer. | |
postprocessor = RunningAveragePostprocessor(window_size=5) # Init running average postprocessor. | |
auroc = AUROCMetric() # Init area under the receiver-operating characteristics curve metric. | |
for X, y in tqdm(iterator.iter(features.values[100:1000], labels.values[100:1000])): # Stream data. | |
# Assuming X is 1-dimensional | |
X_numeric = preprocessor.fit_transform_partial(X[[5, 6, 7, 9]]) | |
X[[5, 6, 7, 9]] = X_numeric # Replace the transformed elements back into the original array | |
score = model.fit_score_partial(X) # Fit model to and score the instance. | |
score = postprocessor.fit_transform_partial(score) # Apply running averaging to the score. | |
auroc.update(y, score) # Update AUROC metric. | |
# Output resulting AUROCS metric. | |
print("AUROC: ", auroc.get()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment