Skip to content

Instantly share code, notes, and snippets.

@go2ready
Created July 1, 2020 09:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save go2ready/05a5f6cf95d98ee8f12c8dda71294f4f to your computer and use it in GitHub Desktop.
Save go2ready/05a5f6cf95d98ee8f12c8dda71294f4f to your computer and use it in GitHub Desktop.
ONNX training
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;
using Microsoft.ML.Trainers.LightGbm;
using System;
using System.IO;
using Newtonsoft.Json;
using System.Text.RegularExpressions;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using Microsoft.ML.OnnxRuntime;
namespace LGBMRanker
{
public class Trainer
{
MLContext mlContext;
IDataView transformedTestData;
IDataView onnxTransformedTestData;
public Trainer()
{
// Create a new context for ML.NET operations. It can be used for
// exception tracking and logging, as a catalog of available operations
// and as the source of randomness. Setting the seed to a fixed number
// in this example to make outputs deterministic.
this.mlContext = new MLContext(seed: 0);
//Load Data
IDataView data = mlContext.Data.LoadFromTextFile<FinalMixtureData>("./data/train.tsv", separatorChar: '\t', hasHeader: true);
// Define trainer options.
var options = new LightGbmRankingTrainer.Options
{
LearningRate = 0.1,
NumberOfIterations = 100,
MinimumExampleCountPerGroup = 1,
NumberOfLeaves = 31,
NumberOfThreads = -1,
MinimumExampleCountPerLeaf = 20,
EarlyStoppingRound = 0,
CategoricalSmoothing = 10,
EvaluationMetric = LightGbmRankingTrainer.Options.EvaluateMetricType.NormalizedDiscountedCumulativeGain,
HandleMissingValue = true,
Sigmoid = 1,
Booster = new GradientBooster.Options
{
FeatureFraction = 1,
},
FeatureColumnName = "Feature",
RowGroupColumnName = "ImpressionIdKey",
LabelColumnName = "BackProClick"
};
// Define Data Prep Estimator
// 1. Concatenate Size and Historical into a single feature vector output to a new column called Features
// 2. Normalize Features vector
IEstimator<ITransformer> dataPrepEstimator = mlContext.Transforms.Categorical.OneHotEncoding(
new[]
{
new InputOutputColumnPair("...", "..."),
new InputOutputColumnPair("...", "...")
})
.Append(mlContext.Transforms.Conversion.MapValueToKey(
new[]
{
new InputOutputColumnPair("...", "..."),
}, keyOrdinality: Microsoft.ML.Transforms.ValueToKeyMappingEstimator
.KeyOrdinality.ByValue))
.Append(mlContext.Transforms.Concatenate("Feature", "...", "..."))
;
// Create data prep transformer
ITransformer dataPrepTransformer = dataPrepEstimator.Fit(data);
// Apply transforms to training data
IDataView transformedTrainingData = dataPrepTransformer.Transform(data);
// Define the trainer.
var pipeline = mlContext.Ranking.Trainers.LightGbm(options);
// Train the model.
var model = pipeline.Fit(colSelTrainingData2);
using (FileStream fs = new FileStream("./model.onnx", FileMode.Truncate, FileAccess.Write))
{
mlContext.Model.ConvertToOnnx(model, colSelTrainingData2, fs);
}
}
}
public class Program
{
public static void Main(string[] args)
{
//var summary = BenchmarkRunner.Run(typeof(Trainer).Assembly);
new Trainer();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment