Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
How to Write a Spelling Corrector in C# (from Peter Norvig's Python example)
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
namespace SpellingCorrector
{
/// <summary>
/// Conversion from http://norvig.com/spell-correct.html by C.Small
/// </summary>
public class Spelling
{
private Dictionary<String, int> _dictionary = new Dictionary<String, int>();
private static Regex _wordRegex = new Regex("[a-z]+", RegexOptions.Compiled);
public Spelling()
{
string fileContent = File.ReadAllText("big.txt");
List<string> wordList = fileContent.Split(new string[] {"\n"} , StringSplitOptions.RemoveEmptyEntries).ToList();
foreach (var word in wordList)
{
string trimmedWord = word.Trim().ToLower();
if (_wordRegex.IsMatch(trimmedWord))
{
if (_dictionary.ContainsKey(trimmedWord))
_dictionary[trimmedWord]++;
else
_dictionary.Add(trimmedWord, 1);
}
}
}
public string Correct(string word)
{
if (string.IsNullOrEmpty(word))
return word;
word = word.ToLower();
// known()
if (_dictionary.ContainsKey(word))
return word;
List<String> list = Edits(word);
Dictionary<string, int> candidates = new Dictionary<string, int>();
foreach (string wordVariation in list)
{
if (_dictionary.ContainsKey(wordVariation) && !candidates.ContainsKey(wordVariation))
candidates.Add(wordVariation, _dictionary[wordVariation]);
}
if (candidates.Count > 0)
return candidates.OrderByDescending(x => x.Value).First().Key;
// known_edits2()
foreach (string item in list)
{
foreach (string wordVariation in Edits(item))
{
if (_dictionary.ContainsKey(wordVariation) && !candidates.ContainsKey(wordVariation))
candidates.Add(wordVariation, _dictionary[wordVariation]);
}
}
return (candidates.Count > 0) ? candidates.OrderByDescending(x => x.Value).First().Key : word;
}
private List<string> Edits(string word)
{
var splits = new List<Tuple<string, string>>();
var transposes = new List<string>();
var deletes = new List<string>();
var replaces = new List<string>();
var inserts = new List<string>();
// Splits
for (int i = 0; i < word.Length; i++)
{
var tuple = new Tuple<string, string>(word.Substring(0, i), word.Substring(i));
splits.Add(tuple);
}
// Deletes
for (int i = 0; i < splits.Count; i++)
{
string a = splits[i].Item1;
string b = splits[i].Item2;
if (!string.IsNullOrEmpty(b))
{
deletes.Add(a + b.Substring(1));
}
}
// Transposes
for (int i = 0; i < splits.Count; i++)
{
string a = splits[i].Item1;
string b = splits[i].Item2;
if (b.Length > 1)
{
transposes.Add(a + b[1] + b[0] + b.Substring(2));
}
}
// Replaces
for (int i = 0; i < splits.Count; i++)
{
string a = splits[i].Item1;
string b = splits[i].Item2;
if (!string.IsNullOrEmpty(b))
{
for (char c = 'a'; c <= 'z'; c++)
{
replaces.Add(a + c + b.Substring(1));
}
}
}
// Inserts
for (int i = 0; i < splits.Count; i++)
{
string a = splits[i].Item1;
string b = splits[i].Item2;
for (char c = 'a'; c <= 'z'; c++)
{
inserts.Add(a + c + b);
}
}
return deletes.Union(transposes).Union(replaces).Union(inserts).ToList();
}
}
}
@nesterenko-kv

This comment has been minimized.

Copy link

nesterenko-kv commented Nov 12, 2015

Why u use so many cycles? And what the reason to use Dictionary<string, int> if u can just use HashSet?

@nesterenko-kv

This comment has been minimized.

Copy link

nesterenko-kv commented Nov 12, 2015

I optimized your programm, come here to see it: https://gist.github.com/nestquik/6c5146c75631a8ff3084

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.