Created
July 1, 2020 09:19
-
-
Save go2ready/05a5f6cf95d98ee8f12c8dda71294f4f to your computer and use it in GitHub Desktop.
ONNX training
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Collections.Generic; | |
using System.Linq; | |
using Microsoft.ML; | |
using Microsoft.ML.Data; | |
using Microsoft.ML.Transforms; | |
using Microsoft.ML.Trainers.LightGbm; | |
using System; | |
using System.IO; | |
using Newtonsoft.Json; | |
using System.Text.RegularExpressions; | |
using BenchmarkDotNet.Attributes; | |
using BenchmarkDotNet.Running; | |
using Microsoft.ML.OnnxRuntime; | |
namespace LGBMRanker | |
{ | |
public class Trainer | |
{ | |
MLContext mlContext; | |
IDataView transformedTestData; | |
IDataView onnxTransformedTestData; | |
public Trainer() | |
{ | |
// Create a new context for ML.NET operations. It can be used for | |
// exception tracking and logging, as a catalog of available operations | |
// and as the source of randomness. Setting the seed to a fixed number | |
// in this example to make outputs deterministic. | |
this.mlContext = new MLContext(seed: 0); | |
//Load Data | |
IDataView data = mlContext.Data.LoadFromTextFile<FinalMixtureData>("./data/train.tsv", separatorChar: '\t', hasHeader: true); | |
// Define trainer options. | |
var options = new LightGbmRankingTrainer.Options | |
{ | |
LearningRate = 0.1, | |
NumberOfIterations = 100, | |
MinimumExampleCountPerGroup = 1, | |
NumberOfLeaves = 31, | |
NumberOfThreads = -1, | |
MinimumExampleCountPerLeaf = 20, | |
EarlyStoppingRound = 0, | |
CategoricalSmoothing = 10, | |
EvaluationMetric = LightGbmRankingTrainer.Options.EvaluateMetricType.NormalizedDiscountedCumulativeGain, | |
HandleMissingValue = true, | |
Sigmoid = 1, | |
Booster = new GradientBooster.Options | |
{ | |
FeatureFraction = 1, | |
}, | |
FeatureColumnName = "Feature", | |
RowGroupColumnName = "ImpressionIdKey", | |
LabelColumnName = "BackProClick" | |
}; | |
// Define Data Prep Estimator | |
// 1. Concatenate Size and Historical into a single feature vector output to a new column called Features | |
// 2. Normalize Features vector | |
IEstimator<ITransformer> dataPrepEstimator = mlContext.Transforms.Categorical.OneHotEncoding( | |
new[] | |
{ | |
new InputOutputColumnPair("...", "..."), | |
new InputOutputColumnPair("...", "...") | |
}) | |
.Append(mlContext.Transforms.Conversion.MapValueToKey( | |
new[] | |
{ | |
new InputOutputColumnPair("...", "..."), | |
}, keyOrdinality: Microsoft.ML.Transforms.ValueToKeyMappingEstimator | |
.KeyOrdinality.ByValue)) | |
.Append(mlContext.Transforms.Concatenate("Feature", "...", "...")) | |
; | |
// Create data prep transformer | |
ITransformer dataPrepTransformer = dataPrepEstimator.Fit(data); | |
// Apply transforms to training data | |
IDataView transformedTrainingData = dataPrepTransformer.Transform(data); | |
// Define the trainer. | |
var pipeline = mlContext.Ranking.Trainers.LightGbm(options); | |
// Train the model. | |
var model = pipeline.Fit(colSelTrainingData2); | |
using (FileStream fs = new FileStream("./model.onnx", FileMode.Truncate, FileAccess.Write)) | |
{ | |
mlContext.Model.ConvertToOnnx(model, colSelTrainingData2, fs); | |
} | |
} | |
} | |
public class Program | |
{ | |
public static void Main(string[] args) | |
{ | |
//var summary = BenchmarkRunner.Run(typeof(Trainer).Assembly); | |
new Trainer(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment