Skip to content

Instantly share code, notes, and snippets.

@antoniovs1029
Last active November 1, 2019 18:48
Show Gist options
  • Save antoniovs1029/997ca183411f173e81a131f09722b092 to your computer and use it in GitHub Desktop.
Save antoniovs1029/997ca183411f173e81a131f09722b092 to your computer and use it in GitHub Desktop.
Issue with ImageLoader working with empty input column
Label ImageSource
dog dog\img1.jpg
dog dog\img2.jpg
dog dog\img3.jpg
fruit fruit\img1.jpg
fruit fruit\img2.jpg
whatever whatever\img1.jpg
whatever whatever\img2.jpg
using Microsoft.ML;
using Microsoft.ML.Data;
namespace TestNugets
{
class Test5
{
public static void Example()
{
var inputFilePath = @"C:\Users\anvelazq\Desktop\inputfile.tsv";
var inputFolder = @"C:\I-dont-exist";
MLContext mlContext = new MLContext(seed: 1);
// Because of a typo on ModelInput class, this dataview won't load
// the values in the ImagePath column, but the program would still run without loading images
IDataView trainingDataView = mlContext.Data.LoadFromTextFile<ModelInput>(
path: inputFilePath,
hasHeader: true,
separatorChar: '\t',
allowQuoting: true,
allowSparse: false);
// This DataView will have empty values in its ImagePath column
// but the program would still run without loading images
IDataView trainingDataView2 = mlContext.Data.LoadFromEnumerable<ModelInput>(
new ModelInput[]
{
new ModelInput { Label = "dog", ImagePath = null },
new ModelInput { Label = "cat" },
new ModelInput { Label = "elephant", ImagePath = ""},
// new ModelInput { Label = "snake", ImagePath = "snake.bmp"} // if uncommented, this program correctly throws an exception because the inputFolder doesn't exist
}
);
var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey("Label", "Label")
.Append(mlContext.Transforms.LoadImages("ImagePath_featurized", inputFolder, "ImagePath"))
.Append(mlContext.Transforms.ResizeImages("ImagePath_featurized", 224, 224, "ImagePath_featurized"))
.Append(mlContext.Transforms.ExtractPixels("ImagePath_featurized", "ImagePath_featurized"))
.Append(mlContext.Transforms.DnnFeaturizeImage("ImagePath_featurized", m => m.ModelSelector.ResNet18(mlContext, m.OutputColumn, m.InputColumn), "ImagePath_featurized"))
.Append(mlContext.Transforms.Concatenate("Features", new[] { "ImagePath_featurized" }))
.Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
.AppendCacheCheckpoint(mlContext);
var trainer = mlContext.MulticlassClassification.Trainers.OneVersusAll(mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "Label", numberOfIterations: 10, featureColumnName: "Features"), labelColumnName: "Label")
.Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));
var trainingPipeline = dataProcessPipeline.Append(trainer);
// Fit and transform with DataView 1
ITransformer model = trainingPipeline.Fit(trainingDataView);
var transformedDataView = model.Transform(trainingDataView);
// Fit and transform with DataView 2
ITransformer model2 = trainingPipeline.Fit(trainingDataView2);
var transformedDataView2 = model2.Transform(trainingDataView2);
// In both options no images were loaded, but still the pipeline assigned PredictedLabels to each row
}
class ModelInput
{
[ColumnName("Label"), LoadColumn(0)]
public string Label { get; set; }
[ColumnName("ImagePath"), LoadColumn(2)] // Column 2 doesnt exist in the input file, so it doesnt load anything into ImagePath
public string ImagePath { get; set; }
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment