Skip to content

Instantly share code, notes, and snippets.

@antoniovs1029
Last active October 23, 2019 23:16
Show Gist options
  • Save antoniovs1029/e5fdd86d5b7c8b6adf34cb5481ee20dd to your computer and use it in GitHub Desktop.
Save antoniovs1029/e5fdd86d5b7c8b6adf34cb5481ee20dd to your computer and use it in GitHub Desktop.
Use ML.net's PFI with a binary prediction transformer loaded from disk
// Based on the original sample of using PFI with Binary prediction:
// https://github.com/dotnet/machinelearning/blob/master/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PermutationFeatureImportance.cs
// Presenting in here a workaround to make it work with a model loaded from disk
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Calibrators;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class PermutationFeatureImportanceLoadFromDisk
{
public static void Example()
{
var mlContext = new MLContext(seed: 1);
var samples = GenerateData();
var data = mlContext.Data.LoadFromEnumerable(samples);
// Create pipeline
var featureColumns =
new string[] { nameof(Data.Feature1), nameof(Data.Feature2) };
var pipeline = mlContext.Transforms
.Concatenate("Features", featureColumns)
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression()
);
// Create and save model
var originalmodel = pipeline.Fit(data);
var modelPath = "./model.zip";
mlContext.Model.Save(originalmodel, data.Schema, modelPath);
// Load model
var model = mlContext.Model.Load(modelPath, out var schema);
// Transform the dataset.
var transformedData = model.Transform(data);
// WORKAROUND
// This is how to extract the linear predictor for PFI and the objects inside of it for any other use:
var linearPredictor = (model as TransformerChain<ITransformer>).LastTransformer as ISingleFeaturePredictionTransformer<object>;
var predictorModel = linearPredictor.Model as CalibratedModelParametersBase;
var predictorSubModel = predictorModel.SubModel as LinearBinaryModelParameters;
// Execute PFI with the linearPredictor
var permutationMetrics = mlContext.BinaryClassification
.PermutationFeatureImportance(linearPredictor, transformedData,
permutationCount: 30);
// Sort indices according to PFI results
var sortedIndices = permutationMetrics
.Select((metrics, index) => new { index, metrics.AreaUnderRocCurve })
.OrderByDescending(
feature => Math.Abs(feature.AreaUnderRocCurve.Mean))
.Select(feature => feature.index);
Console.WriteLine("Feature\tModel Weight\tChange in AUC"
+ "\t95% Confidence in the Mean Change in AUC");
var auc = permutationMetrics.Select(x => x.AreaUnderRocCurve).ToArray();
foreach (int i in sortedIndices)
{
Console.WriteLine("{0}\t{1:0.00}\t{2:G4}\t{3:G4}",
featureColumns[i],
predictorSubModel.Weights[i], // this way we can access the weights inside the submodel
auc[i].Mean,
1.96 * auc[i].StandardError);
}
// Expected output:
// Feature Model Weight Change in AUC 95% Confidence in the Mean Change in AUC
// Feature2 35.15 -0.387 0.002015
// Feature1 17.94 -0.1514 0.0008963
}
private class Data
{
public bool Label { get; set; }
public float Feature1 { get; set; }
public float Feature2 { get; set; }
}
/// Generate Data
private static IEnumerable<Data> GenerateData(int nExamples = 10000,
double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1)
{
var rng = new Random(seed);
for (int i = 0; i < nExamples; i++)
{
var data = new Data
{
Feature1 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)),
Feature2 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)),
};
// Create a noisy label.
var value = (float)(bias + weight1 * data.Feature1 + weight2 *
data.Feature2 + rng.NextDouble() - 0.5);
data.Label = Sigmoid(value) > 0.5;
yield return data;
}
}
private static double Sigmoid(double x) => 1.0 / (1.0 + Math.Exp(-1 * x));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment