This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Adapted from https://huggingface.co/docs/transformers/tasks/sequence_classification | |
# Import packages from HuggingFace | |
from datasets import load_dataset | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer | |
from torch import nn | |
# Load IMDB dataset (movie reviews) | |
imdb = load_dataset("imdb") | |
# Load tokenizer associated with the DistilBERT model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyod.models.ecod import ECOD | |
clf_name = 'ECOD' | |
clf = ECOD() | |
clf.fit(X_train) | |
test_scores = clf.decision_function(X_test) | |
from pyod.utils.utility import precision_n_scores | |
from sklearn.metrics import roc_auc_score | |
roc = round(roc_auc_score(y_test, test_scores), ndigits=4) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
clf = LocalOutlierFactor(novelty=True) | |
clf = clf.fit(X_train) | |
test_scores = clf.decision_function(X_test) | |
# LOF in sklearn returns negative decision scores. Multiply by -1 to approximate the original score, otherwise the roc_auc_score function doesn't work. | |
# Documentation: "Inliers tend to have a LOF score close to 1 (negative_outlier_factor_ close to -1), while outliers tend to have a larger LOF score" | |
test_scores = -1*test_scores | |
roc = round(roc_auc_score(y_test, test_scores), ndigits=4) | |
prn = round(precision_n_scores(y_test, test_scores), ndigits=4) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.neighbors import LocalOutlierFactor | |
# fit the model for outlier detection (default) | |
clf = LocalOutlierFactor(novelty=False) | |
y_pred = clf.fit_predict(X_train) | |
# Convert y_pred to the same convention as pyod. | |
# In sklearn, -1 is an outlier. In pyod, 1 is an outlier. | |
y_pred = y_pred == -1 | |
n_errors = (y_pred != y_train).sum() | |
print("Error rate:", n_errors/len(y_pred)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
detector = LOF() | |
scores = detector.fit(X_train).decision_function(X_test) | |
sns.distplot(scores[y_test==0], label="inlier scores") | |
sns.distplot(scores[y_test==1], label="outlier scores").set_title("Distribution of Outlier Scores from LOF Detector") | |
plt.legend() | |
plt.xlabel("Outlier score") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyod.models.lof import LOF | |
clf_name = 'LOF' | |
clf = LOF() | |
clf.fit(X_train) | |
test_scores = clf.decision_function(X_test) | |
roc = round(roc_auc_score(y_test, test_scores), ndigits=4) | |
prn = round(precision_n_scores(y_test, test_scores), ndigits=4) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn import datasets | |
from river import stream | |
from river import compose | |
from river import linear_model | |
from river import optim | |
from river import preprocessing | |
from river import metrics | |
model = compose.Pipeline( | |
preprocessing.StandardScaler(), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from river import compose | |
from river import linear_model | |
from river import preprocessing | |
from river import feature_extraction | |
model = compose.Pipeline( | |
preprocessing.StandardScaler(), | |
feature_extraction.PolynomialExtender(), | |
linear_model.LinearRegression() | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
x = {'moment': datetime.datetime(2016, 4, 1, 0, 0, 7), | |
'clouds': 75, | |
'description': 'light rain', | |
'temperature': 6.54, | |
'wind': 9.3} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import flask | |
app = flask.Flask(__name__) | |
@app.route('/', methods=['GET']) | |
def predict(): | |
payload = flask.request.json | |
river_model = load_model() | |
return river_model.predict_proba_one(payload) | |
@app.route('/', methods=['POST']) |
NewerOlder