Skip to content

Instantly share code, notes, and snippets.

# Adapted from https://huggingface.co/docs/transformers/tasks/sequence_classification
# Import packages from HuggingFace
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from torch import nn
# Load IMDB dataset (movie reviews)
imdb = load_dataset("imdb")
# Load tokenizer associated with the DistilBERT model
from pyod.models.ecod import ECOD
clf_name = 'ECOD'
clf = ECOD()
clf.fit(X_train)
test_scores = clf.decision_function(X_test)
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score
roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
clf = LocalOutlierFactor(novelty=True)
clf = clf.fit(X_train)
test_scores = clf.decision_function(X_test)
# LOF in sklearn returns negative decision scores. Multiply by -1 to approximate the original score, otherwise the roc_auc_score function doesn't work.
# Documentation: "Inliers tend to have a LOF score close to 1 (negative_outlier_factor_ close to -1), while outliers tend to have a larger LOF score"
test_scores = -1*test_scores
roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
prn = round(precision_n_scores(y_test, test_scores), ndigits=4)
from sklearn.neighbors import LocalOutlierFactor
# fit the model for outlier detection (default)
clf = LocalOutlierFactor(novelty=False)
y_pred = clf.fit_predict(X_train)
# Convert y_pred to the same convention as pyod.
# In sklearn, -1 is an outlier. In pyod, 1 is an outlier.
y_pred = y_pred == -1
n_errors = (y_pred != y_train).sum()
print("Error rate:", n_errors/len(y_pred))
import matplotlib.pyplot as plt
detector = LOF()
scores = detector.fit(X_train).decision_function(X_test)
sns.distplot(scores[y_test==0], label="inlier scores")
sns.distplot(scores[y_test==1], label="outlier scores").set_title("Distribution of Outlier Scores from LOF Detector")
plt.legend()
plt.xlabel("Outlier score")
from pyod.models.lof import LOF
clf_name = 'LOF'
clf = LOF()
clf.fit(X_train)
test_scores = clf.decision_function(X_test)
roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
prn = round(precision_n_scores(y_test, test_scores), ndigits=4)
from sklearn import datasets
from river import stream
from river import compose
from river import linear_model
from river import optim
from river import preprocessing
from river import metrics
model = compose.Pipeline(
preprocessing.StandardScaler(),
from river import compose
from river import linear_model
from river import preprocessing
from river import feature_extraction
model = compose.Pipeline(
preprocessing.StandardScaler(),
feature_extraction.PolynomialExtender(),
linear_model.LinearRegression()
)
import datetime
x = {'moment': datetime.datetime(2016, 4, 1, 0, 0, 7),
'clouds': 75,
'description': 'light rain',
'temperature': 6.54,
'wind': 9.3}
import flask
app = flask.Flask(__name__)
@app.route('/', methods=['GET'])
def predict():
payload = flask.request.json
river_model = load_model()
return river_model.predict_proba_one(payload)
@app.route('/', methods=['POST'])