nebiyuelias1/iforestasd.py

## iforestasd.py
import numpy as np
from sklearn.utils import shuffle
from pysad.models import IForestASD
from pysad.transform.preprocessing import InstanceUnitNormScaler
from pysad.transform.postprocessing import RunningAveragePostprocessor
from pysad.utils import Data
from pysad.evaluation import AUROCMetric
from pysad.utils.array_streamer import ArrayStreamer
from tqdm import tqdm
import pandas as pd

if __name__ == "__main__":
    np.random.seed(61)  # Fix random seed.

    # Load data from CSV using pandas
    df = pd.read_csv('data/fraudTrain.csv')

    # Assuming the 'is_fraud' column contains the labels (1 for fraud, 0 for normal)
    labels = df['is_fraud']

    # Drop non-numeric columns and the label column
    features = df.drop(['trans_date_trans_time', 'merchant', 'category', 'first', 'last', 'gender',
                        'street', 'city', 'state', 'job', 'dob', 'trans_num', 'is_fraud'], axis=1)

    # Shuffle the data
    features, labels = shuffle(features, labels)

    iterator = ArrayStreamer(shuffle=False)  # Init streamer to simulate streaming data.

    model = IForestASD()  # Init xStream anomaly detection model.
    preprocessor = InstanceUnitNormScaler()  # Init normalizer.
    postprocessor = RunningAveragePostprocessor(window_size=5)  # Init running average postprocessor.
    auroc = AUROCMetric()  # Init area under the receiver-operating characteristics curve metric.

    for X, y in tqdm(iterator.iter(features.values[100:1000], labels.values[100:1000])):  # Stream data.
        # Assuming X is 1-dimensional
        X_numeric = preprocessor.fit_transform_partial(X[[5, 6, 7, 9]])
        X[[5, 6, 7, 9]] = X_numeric  # Replace the transformed elements back into the original array

        score = model.fit_score_partial(X)  # Fit model to and score the instance.
        score = postprocessor.fit_transform_partial(score)  # Apply running averaging to the score.

        auroc.update(y, score)  # Update AUROC metric.

    # Output resulting AUROCS metric.
    print("AUROC: ", auroc.get())
	import numpy as np
	from sklearn.utils import shuffle
	from pysad.models import IForestASD
	from pysad.transform.preprocessing import InstanceUnitNormScaler
	from pysad.transform.postprocessing import RunningAveragePostprocessor
	from pysad.utils import Data
	from pysad.evaluation import AUROCMetric
	from pysad.utils.array_streamer import ArrayStreamer
	from tqdm import tqdm
	import pandas as pd

	if __name__ == "__main__":
	np.random.seed(61) # Fix random seed.

	# Load data from CSV using pandas
	df = pd.read_csv('data/fraudTrain.csv')

	# Assuming the 'is_fraud' column contains the labels (1 for fraud, 0 for normal)
	labels = df['is_fraud']

	# Drop non-numeric columns and the label column
	features = df.drop(['trans_date_trans_time', 'merchant', 'category', 'first', 'last', 'gender',
	'street', 'city', 'state', 'job', 'dob', 'trans_num', 'is_fraud'], axis=1)

	# Shuffle the data
	features, labels = shuffle(features, labels)

	iterator = ArrayStreamer(shuffle=False) # Init streamer to simulate streaming data.

	model = IForestASD() # Init xStream anomaly detection model.
	preprocessor = InstanceUnitNormScaler() # Init normalizer.
	postprocessor = RunningAveragePostprocessor(window_size=5) # Init running average postprocessor.
	auroc = AUROCMetric() # Init area under the receiver-operating characteristics curve metric.

	for X, y in tqdm(iterator.iter(features.values[100:1000], labels.values[100:1000])): # Stream data.
	# Assuming X is 1-dimensional
	X_numeric = preprocessor.fit_transform_partial(X[[5, 6, 7, 9]])
	X[[5, 6, 7, 9]] = X_numeric # Replace the transformed elements back into the original array

	score = model.fit_score_partial(X) # Fit model to and score the instance.
	score = postprocessor.fit_transform_partial(score) # Apply running averaging to the score.

	auroc.update(y, score) # Update AUROC metric.

	# Output resulting AUROCS metric.
	print("AUROC: ", auroc.get())