Skip to content

Instantly share code, notes, and snippets.

@tecno14
Created October 5, 2021 11:00
Show Gist options
  • Save tecno14/799fe5894927cfdc45c105b8cdbf28b5 to your computer and use it in GitHub Desktop.
Save tecno14/799fe5894927cfdc45c105b8cdbf28b5 to your computer and use it in GitHub Desktop.
LabelEncoder in C# (double to int only)
using System;
using System.Linq;
using System.Collections.Generic;
namespace PricePrediction.MachineLearning
{
/// <summary>
/// this class used to encode double data into continuous integer values and decode it vice versa
/// for example [0.1, 2, 9] will encoded as [0, 1, 2]
/// similar : https://github.com/foreverzet/Sharpkit.Learn/blob/master/src/Sharpkit.Learn/Preprocessing/LabelEncoder.cs
/// </summary>
public class LabelEncoder
{
/// <summary>
/// Classes mean what encoded values is.
/// since we use integers we can know them by just knowing count of them and generate list with same length
/// </summary>
public readonly List<double> Classes;
public LabelEncoder()
{
Classes = new();
}
public List<int> FitTransform(IList<double> data)
{
return Fit(data).Transform(data);
}
/// <summary>
/// Fit label encoder.
/// </summary>
/// <param name="data">Target values.</param>
/// <returns>Returns an instance of self.</returns>
public LabelEncoder Fit(IList<double> data)
{
//save time by get only distinct data
data = data.Distinct().ToList();
//if there no prevease data just add them all
if (Classes.Count == 0)
{
Classes.AddRange(data);
return this;
}
foreach (var item in data)
{
if (Classes.Contains(item))
continue;
Classes.Add(item);
}
return this;
}
/// <summary>
/// Fit label encoder and return encoded labels.
/// </summary>
/// <param name="data">Target values.</param>
/// <returns>Array.</returns>
public List<int> Transform(IList<double> data)
{
List<int> result = new();
foreach (var item in data)
{
int index = Classes.IndexOf(item);
if (index == -1)
throw new Exception($"{item} not found");
result.Add(index);
}
return result;
}
/// <summary>
/// Transform labels back to original encoding.
/// </summary>
/// <param name="data">Target values.</param>
/// <returns></returns>
public double InverseTransform(int data)
{
return InverseTransform(new List<int>() { data })[0];
}
/// <summary>
/// Transform labels back to original encoding.
/// </summary>
/// <param name="data">Target values.</param>
/// <returns>Array.</returns>
private List<double> InverseTransform(IList<int> data)
{
List<double> result = new();
foreach (int item in data)
{
if ((Classes.Count <= item) || (item < 0))
throw new Exception($"{item} not found");
result.Add(Classes[item]);
}
return result;
}
}
}
@tecno14
Copy link
Author

tecno14 commented Oct 5, 2021

        public static double[][] ToArraysOfColumns<T>(this List<T> listOfObjects)
        {
            PropertyDescriptorCollection properties = TypeDescriptor.GetProperties(typeof(T));
            var dt = new double[properties.Count][];
            for (int i = 0; i < properties.Count; i++)
                dt[i] = new double[listOfObjects.Count];

            for (int o = 0; o < listOfObjects.Count; o++)
                for (int p = 0; p < properties.Count; p++)
                    dt[p][o] = Convert.ToDouble(properties[p].GetValue(listOfObjects[o]));

            return dt;
        }

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment