Created
March 5, 2014 22:08
-
-
Save adrianseeley/9377668 to your computer and use it in GitHub Desktop.
GATO.AUTOFEATURE - automatically optimizes feature selection using weights, capable of handling 1 Million+ features, also draws pretty smears
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text; | |
using System.Threading.Tasks; | |
using System.IO; | |
using System.Drawing; | |
using System.Threading; | |
namespace GATO.AUTOFEATURE | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
List<TrainingCase> TrainingCases = new List<TrainingCase>(); | |
List<TrainingCase> ValidationCases = new List<TrainingCase>(); | |
#region IRIS | |
using (FileStream FS = File.OpenRead("iris.data")) | |
{ | |
using (StreamReader SR = new StreamReader(FS)) | |
{ | |
while (!SR.EndOfStream) | |
{ | |
String Line = SR.ReadLine(); | |
String[] Parts = Line.Split(','); | |
List<double> Inputs = new List<double>(); | |
for (int p = 0; p < Parts.Length - 1; p++) | |
{ | |
Inputs.Add(Double.Parse(Parts[p])); | |
} | |
TrainingCases.Add(new TrainingCase(Inputs.ToArray(), Parts[Parts.Length - 1])); | |
} | |
} | |
} | |
AutoFeaturizer AF = new AutoFeaturizer( | |
NumberOfInputs: 4, | |
NumberOfClasses: 3, | |
MaxTailLength: 1000, | |
NumberOfCandidates: 1000, | |
TrainingCases: TrainingCases, | |
MaxIterations: 10000, | |
MaxAge: 100); | |
#endregion | |
/* | |
using (FileStream FS = File.OpenRead("ocr_train.data")) | |
{ | |
using (StreamReader SR = new StreamReader(FS)) | |
{ | |
SR.ReadLine(); // fucking headers | |
while (!SR.EndOfStream) | |
{ | |
String Line = SR.ReadLine(); | |
String[] Parts = Line.Split(','); | |
List<double> Inputs = new List<double>(); | |
for (int p = 1; p < Parts.Length; p++) | |
{ | |
Inputs.Add(Double.Parse(Parts[p])); | |
} | |
TrainingCases.Add(new TrainingCase(Inputs.ToArray(), Parts[0])); | |
} | |
} | |
}*/ | |
} | |
} | |
class TrainingCase | |
{ | |
public double[] Inputs; | |
public String Class; | |
public TrainingCase(double[] Inputs, String Class) | |
{ | |
this.Inputs = Inputs; | |
this.Class = Class; | |
} | |
} | |
class AutoFeaturizer | |
{ | |
public List<FeatureConfiguration> Tail; | |
public FeatureConfiguration Head; | |
public FeatureConfiguration Best; | |
public List<FeatureConfiguration> Candidates; | |
public int NumberOfInputs; | |
public int NumberOfClasses; | |
public int MaxTailLength; | |
public int NumberOfCandidates; | |
public AutoFeaturizer(int NumberOfInputs, int NumberOfClasses, int MaxTailLength, int NumberOfCandidates, List<TrainingCase> TrainingCases, int MaxIterations, int MaxAge) | |
{ | |
this.NumberOfInputs = NumberOfInputs; | |
this.NumberOfClasses = NumberOfClasses; | |
this.MaxTailLength = MaxTailLength; | |
this.NumberOfCandidates = NumberOfCandidates; | |
Tail = new List<FeatureConfiguration>(); | |
Head = new FeatureConfiguration(NumberOfInputs, TrainingCases); | |
Best = new FeatureConfiguration(NumberOfInputs, TrainingCases); | |
Candidates = new List<FeatureConfiguration>(); for (int c = 0; c < NumberOfCandidates; c++) Candidates.Add(new FeatureConfiguration(NumberOfInputs, TrainingCases)); | |
Head.AssessFitness(TrainingCases); | |
for (int i = 0; i < MaxIterations && Head.Faults > NumberOfClasses - 1; i++) | |
{ | |
Console.WriteLine("i: " + i + " O: " + Head.Overlaps + " F: " + Head.Faults + "->" + (NumberOfClasses - 1) + " A: " + Head.Age + " T: " + Tail.Count); | |
Head.Draw("Test.png", NumberOfClasses); | |
Thread.Sleep(1); | |
double BestOverlaps = Head.Overlaps; | |
double BestFaults = Head.Faults; | |
int BestCandidateAt = -1; | |
for (int c = 0; c < Candidates.Count; c++) | |
{ | |
Candidates[c].MutateFrom(Head); | |
Candidates[c].AssessFitness(TrainingCases); | |
if (Candidates[c].Overlaps <= BestOverlaps && Candidates[c].Faults < BestFaults) | |
{ | |
BestOverlaps = Candidates[c].Overlaps; | |
BestFaults = Candidates[c].Faults; | |
BestCandidateAt = c; | |
} | |
if (Candidates[c].Overlaps <= Best.Overlaps && Candidates[c].Faults < Best.Faults) | |
{ | |
Best.CloneFrom(Candidates[c]); | |
} | |
} | |
if (BestCandidateAt > -1) | |
{ | |
Tail.Add(new FeatureConfiguration(NumberOfInputs, TrainingCases).CloneFrom(Head)); | |
Head.CloneFrom(Candidates[BestCandidateAt]); | |
} | |
else | |
{ | |
Head.Age++; | |
if (Head.Age > MaxAge && Tail.Count > 0) | |
{ | |
Head.CloneFrom(Tail[Tail.Count - 1]); | |
Tail.RemoveAt(Tail.Count - 1); | |
} | |
} | |
} | |
Console.ReadLine(); | |
} | |
} | |
class FeatureConfiguration | |
{ | |
public static Random r = new Random(); | |
public double[] Weights; | |
public List<ClassExpression> Smear; | |
public double Faults; | |
public double Overlaps; | |
public int Age; | |
public FeatureConfiguration(int NumberOfInputs, List<TrainingCase> TrainingCases) | |
{ | |
Weights = new double[NumberOfInputs]; | |
Smear = new List<ClassExpression>(); for (int t = 0; t < TrainingCases.Count; t++) Smear.Add(new ClassExpression()); | |
} | |
public void MutateFrom(FeatureConfiguration Host) | |
{ | |
for (int w = 0; w < Weights.Length; w++) | |
{ | |
Weights[w] = Host.Weights[w] + ((r.NextDouble() * 0.1) - 0.05); | |
} | |
} | |
public FeatureConfiguration CloneFrom(FeatureConfiguration Host) | |
{ | |
Host.Weights.CopyTo(Weights, 0); | |
for (int s = 0; s < Smear.Count; s++) Smear[s].Set(Host.Smear[s].Class, Host.Smear[s].Value); | |
Faults = Host.Faults; | |
Overlaps = Host.Overlaps; | |
Age = Host.Age; | |
return this; | |
} | |
public void AssessFitness(List<TrainingCase> TrainingCases) | |
{ | |
for (int t = 0; t < TrainingCases.Count; t++) | |
{ | |
double OutputSum = 0; | |
for (int i = 0; i < TrainingCases[t].Inputs.Length; i++) OutputSum += Weights[i] * TrainingCases[t].Inputs[i]; | |
Smear[t].Set(TrainingCases[t].Class, OutputSum); | |
} | |
Smear.Sort((a, b) => { return a.Value.CompareTo(b.Value); }); | |
Faults = 0; | |
Overlaps = 0; | |
String LastClass = Smear[0].Class; | |
for (int s = 1; s < Smear.Count; s++) | |
{ | |
if (Smear[s].Class != LastClass) | |
{ | |
Faults++; | |
if (Smear[s].Value == Smear[s - 1].Value) | |
{ | |
Overlaps++; | |
} | |
LastClass = Smear[s].Class; | |
} | |
} | |
} | |
public void Draw(String Filename, int NumberOfClasses) | |
{ | |
List<ClassExpression> NormalizedSmear = new List<ClassExpression>(); | |
double HighestValue = Smear[0].Value; | |
double LowestValue = Smear[0].Value; | |
List<String> Classes = new List<string>(); | |
for (int s = 0; s < Smear.Count; s++) | |
{ | |
if (Smear[s].Value > HighestValue) HighestValue = Smear[s].Value; | |
if (Smear[s].Value < LowestValue) LowestValue = Smear[s].Value; | |
if (Classes.IndexOf(Smear[s].Class) == -1) Classes.Add(Smear[s].Class); | |
} | |
double ValueRange = HighestValue - LowestValue; | |
if (ValueRange == 0) | |
{ | |
HighestValue = 1; | |
LowestValue = -1; | |
ValueRange = 2; | |
} | |
for (int s = 0; s < Smear.Count; s++) | |
{ | |
NormalizedSmear.Add(new ClassExpression().Set(Smear[s].Class, (Smear[s].Value - LowestValue) / ValueRange)); | |
} | |
List<Brush> BrushSet = new List<Brush>() | |
{ | |
Brushes.Red, | |
Brushes.Green, | |
Brushes.Blue, | |
Brushes.Yellow, | |
Brushes.Purple, | |
Brushes.HotPink, | |
Brushes.White, | |
Brushes.Lime, | |
Brushes.Azure, | |
Brushes.Orange | |
}; | |
float size = 1000; | |
float border = 100; | |
Bitmap b = new Bitmap((int)size, (int)size); | |
using (Graphics g = Graphics.FromImage(b)) | |
{ | |
g.Clear(Color.Black); | |
for (int n = 0; n < NormalizedSmear.Count; n++) | |
{ | |
g.FillRectangle(BrushSet[Classes.IndexOf(NormalizedSmear[n].Class)], | |
((n / (float)NormalizedSmear.Count) * (size - (border * 2))) + border, | |
((float)NormalizedSmear[n].Value * (size - (border * 2))) + border, | |
5f, 5f); | |
} | |
g.DrawString("O: " + Overlaps + " F: " + Faults + "->" + (NumberOfClasses - 1), new Font(FontFamily.GenericMonospace, 25, FontStyle.Bold), Brushes.White, 10, 10); | |
} | |
b.Save(Filename); | |
} | |
} | |
class ClassExpression | |
{ | |
public String Class; | |
public double Value; | |
public ClassExpression Set(String Class, double Value) | |
{ | |
this.Class = Class; | |
this.Value = Value; | |
return this; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment