Skip to content

Instantly share code, notes, and snippets.

@sudipto80
Created October 10, 2014 08:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sudipto80/994bfb0495b3fcdac344 to your computer and use it in GitHub Desktop.
Save sudipto80/994bfb0495b3fcdac344 to your computer and use it in GitHub Desktop.
k Nearest Neighbour using LINQ
//Nearest Neighbour
var trainingSet = File.ReadAllText(@"C:\iris.csv")
.Split(new char[]{'\r','\n'},StringSplitOptions.RemoveEmptyEntries)
.Select ( f => f.Split(','))
.Skip(1)
.Select (f =>
new
{
SepalLength = Convert.ToDouble( f[0]),
SepalWidth = Convert.ToDouble(f[1]),
PetalLength = Convert.ToDouble(f[2]),
PetalWidth = Convert.ToDouble(f[3]),
Name = f[4]
})
//RandomSubset is a method from MoreLINQ.
// So you have to reference that
//in LINQPad to use this method. Refer to Chapter 5.
.RandomSubset(100);
//Test data
double sepalLength = 5.5;
double sepalWidth = 2.6;
double petalLength = 4;
double petalWidth = 1.2;
int k = 5;
//Euclidean distance function
Func<double,double,double,double,double,double,double,double,double> Distance =
(sl1,sl2,sw1,sw2,pl1,pl2,pw1,pw2) => Math.Sqrt(Math.Pow(sl1-sl2,2)
+ Math.Pow(sw1-sw2,2)
+ Math.Pow(pl1-pl2,2)
+ Math.Pow(pw1-pw2,2));
//Figure out what flower it is.
trainingSet
.Select (s =>
new
{
Name = s.Name,
DistanceFromTestData =
Distance(sepalLength,s.SepalLength,sepalWidth,s.SepalWidth,
petalLength, s.PetalLength, petalWidth, s.PetalWidth)
})
.OrderBy (s => s.DistanceFromTestData )
//Take the first "k" elements
.Take(k)
//Create a lookup with the "Name"
.ToLookup (s => s.Name)
.Dump()
//Sort the elements as per the descending order of number of elements in that class
.OrderByDescending (s => s.Count())
//Pick the first one--with the highest count
.First ()
//Pick it's class
.Key
.Dump("I think the flower is");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment