Alexandra Amidon lynnssi

## transformer_example.py
# Adapted from https://huggingface.co/docs/transformers/tasks/sequence_classification
# Import packages from HuggingFace
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from torch import nn

# Load IMDB dataset (movie reviews)
imdb = load_dataset("imdb")

# Load tokenizer associated with the DistilBERT model

## ECOD_fit_score.py
from pyod.models.ecod import ECOD
clf_name = 'ECOD'
clf = ECOD()
clf.fit(X_train)

test_scores = clf.decision_function(X_test)

from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score
roc = round(roc_auc_score(y_test, test_scores), ndigits=4)

## lof_novelty_sklearn.py
clf = LocalOutlierFactor(novelty=True)
clf = clf.fit(X_train)
test_scores = clf.decision_function(X_test)

# LOF in sklearn returns negative decision scores. Multiply by -1 to approximate the original score, otherwise the roc_auc_score function doesn't work.
# Documentation: "Inliers tend to have a LOF score close to 1 (negative_outlier_factor_ close to -1), while outliers tend to have a larger LOF score"
test_scores = -1*test_scores

roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

## lof_predict_sklearn.py
from sklearn.neighbors import LocalOutlierFactor
# fit the model for outlier detection (default)
clf = LocalOutlierFactor(novelty=False)
y_pred = clf.fit_predict(X_train)

# Convert y_pred to the same convention as pyod.
# In sklearn, -1 is an outlier. In pyod, 1 is an outlier.
y_pred = y_pred == -1
n_errors = (y_pred != y_train).sum()
print("Error rate:", n_errors/len(y_pred))

## lof_decision_scores.py
import matplotlib.pyplot as plt

detector = LOF()
scores = detector.fit(X_train).decision_function(X_test)

sns.distplot(scores[y_test==0], label="inlier scores")
sns.distplot(scores[y_test==1], label="outlier scores").set_title("Distribution of Outlier Scores from LOF Detector")
plt.legend()
plt.xlabel("Outlier score")

## train_test_lof.py
from pyod.models.lof import LOF
clf_name = 'LOF'
clf = LOF()
clf.fit(X_train)

test_scores = clf.decision_function(X_test)

roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

## river_key_example_pipeline.py
from sklearn import datasets
from river import stream
from river import compose
from river import linear_model
from river import optim
from river import preprocessing
from river import metrics

model = compose.Pipeline(
    preprocessing.StandardScaler(),

## river_simple_pipeline.py
from river import compose
from river import linear_model
from river import preprocessing
from river import feature_extraction

model = compose.Pipeline(
    preprocessing.StandardScaler(),
    feature_extraction.PolynomialExtender(),
    linear_model.LinearRegression()
)

## river_data_structure.py
import datetime
x = {'moment': datetime.datetime(2016, 4, 1, 0, 0, 7),
     'clouds': 75,
     'description': 'light rain',
     'temperature': 6.54,
     'wind': 9.3}

## river_flask_example.py
import flask
app = flask.Flask(__name__)

@app.route('/', methods=['GET'])
def predict():
    payload = flask.request.json
    river_model = load_model()
    return river_model.predict_proba_one(payload)

@app.route('/', methods=['POST'])
	# Adapted from https://huggingface.co/docs/transformers/tasks/sequence_classification
	# Import packages from HuggingFace
	from datasets import load_dataset
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
	from torch import nn

	# Load IMDB dataset (movie reviews)
	imdb = load_dataset("imdb")

	# Load tokenizer associated with the DistilBERT model
	from pyod.models.ecod import ECOD
	clf_name = 'ECOD'
	clf = ECOD()
	clf.fit(X_train)

	test_scores = clf.decision_function(X_test)

	from pyod.utils.utility import precision_n_scores
	from sklearn.metrics import roc_auc_score
	roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
	clf = LocalOutlierFactor(novelty=True)
	clf = clf.fit(X_train)
	test_scores = clf.decision_function(X_test)

	# LOF in sklearn returns negative decision scores. Multiply by -1 to approximate the original score, otherwise the roc_auc_score function doesn't work.
	# Documentation: "Inliers tend to have a LOF score close to 1 (negative_outlier_factor_ close to -1), while outliers tend to have a larger LOF score"
	test_scores = -1*test_scores

	roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
	prn = round(precision_n_scores(y_test, test_scores), ndigits=4)
	from sklearn.neighbors import LocalOutlierFactor
	# fit the model for outlier detection (default)
	clf = LocalOutlierFactor(novelty=False)
	y_pred = clf.fit_predict(X_train)

	# Convert y_pred to the same convention as pyod.
	# In sklearn, -1 is an outlier. In pyod, 1 is an outlier.
	y_pred = y_pred == -1
	n_errors = (y_pred != y_train).sum()
	print("Error rate:", n_errors/len(y_pred))
	import matplotlib.pyplot as plt

	detector = LOF()
	scores = detector.fit(X_train).decision_function(X_test)

	sns.distplot(scores[y_test==0], label="inlier scores")
	sns.distplot(scores[y_test==1], label="outlier scores").set_title("Distribution of Outlier Scores from LOF Detector")
	plt.legend()
	plt.xlabel("Outlier score")
	from pyod.models.lof import LOF
	clf_name = 'LOF'
	clf = LOF()
	clf.fit(X_train)

	test_scores = clf.decision_function(X_test)

	roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
	prn = round(precision_n_scores(y_test, test_scores), ndigits=4)
	from sklearn import datasets
	from river import stream
	from river import compose
	from river import linear_model
	from river import optim
	from river import preprocessing
	from river import metrics

	model = compose.Pipeline(
	preprocessing.StandardScaler(),
	import datetime
	x = {'moment': datetime.datetime(2016, 4, 1, 0, 0, 7),
	'clouds': 75,
	'description': 'light rain',
	'temperature': 6.54,
	'wind': 9.3}
	import flask
	app = flask.Flask(__name__)

	@app.route('/', methods=['GET'])
	def predict():
	payload = flask.request.json
	river_model = load_model()
	return river_model.predict_proba_one(payload)

	@app.route('/', methods=['POST'])