Skip to content

Instantly share code, notes, and snippets.

@sudipto80
Created October 10, 2014 08:47
Show Gist options
  • Save sudipto80/1d96feab131cc03c7cb4 to your computer and use it in GitHub Desktop.
Save sudipto80/1d96feab131cc03c7cb4 to your computer and use it in GitHub Desktop.
Peter Norvig's Spell Checker (using List Comprehension in LINQ)
//Peter Norvig's Spell Check
Dictionary<string,int> NWords = new Dictionary<string,int>();
public IEnumerable<string> Edits1(string word)
{
char[] alphabet = {'a','b','c','d','e','f','g','h','j','k','l','m','n','o',
'p','q','r','s','t','u','v','w','x','y','z'};
var splits = Enumerable.Range(1,word.Length).Select(i => new {First = new string(word.ToCharArray().Take(i).ToArray()),
Second = new string(word.ToCharArray().Skip(i).ToArray())});
var deletes = splits.Where (split => !string.IsNullOrEmpty(split.Second)).Select (split => split.First + split.Second.Substring(1));
var transposes = splits.Where (split => split.Second.Length>1)
.Select (split => split.First + split.Second[1] + split.Second[0] + split.Second.Substring(2));
var replaces = splits.Where (split => !string.IsNullOrEmpty(split.Second))
.SelectMany(split => alphabet.Select (c => split.First + c + split.Second.Substring(1)));
var inserts = splits.Where (split => !string.IsNullOrEmpty(split.Second))
.SelectMany(split => alphabet.Select (c => split.First + c + split.Second));
return deletes.Union(transposes).Union(replaces).Union(inserts);
}
public Dictionary<string,int> Train(IEnumerable<string> features)
{
Dictionary<string,int> model = new Dictionary<string,int>();
features.ToList().ForEach( f => {if (model.ContainsKey(f)) model[f] += 1; else model.Add(f,1);});
return model;
}
public IEnumerable<string> KnownEdits2(string word)
{
List<string> knownEdits2 = new List<string>();
return Edits1(word).SelectMany( e1 => Edits1(e1).Where (x => NWords.ContainsKey(x)));
}
public IEnumerable<string> Known(IEnumerable<string> words)
{
return words.Intersect(NWords.Select (v => v.Key));
}
public IEnumerable<string> Correct(string word)
{
List<string> candidates = new List<string>();
candidates.AddRange(Known(new List<string>(){word}));
candidates.AddRange(Known(Edits1(word)));
candidates.AddRange(Known(Edits1(word)));
candidates.AddRange(KnownEdits2(word));
candidates.Add(word);
return candidates.Where (c => NWords.ContainsKey(c)).OrderByDescending (c => NWords[c]);
}
void Main()
{
StreamReader sr = new StreamReader ("C:\\T9.txt");
string total = sr.ReadToEnd();
sr.Close();
NWords = Train(Regex.Matches(total,"[a-z]+")
.Cast<Match>()
.Select (m => m.Value.ToLower()));
string word = "mysgtry";//should return "mystery"
Correct(word).Distinct().OrderByDescending (x => x.Length ).Dump();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment