Skip to content

Instantly share code, notes, and snippets.

@fffej
Created Oct 13, 2021
Embed
What would you like to do?
2 Gram model in C#
using System;
using System.IO;
using System.Collections.Generic;
using System.Linq;
namespace generating_text
{
class Model
{
// A map of Word => {Words}.
// Note that words are duplicated according to the frequency they occur
private Dictionary<string, List<string>> _languageModel;
private Model(Dictionary<string,List<string>> languageModel)
{
_languageModel = languageModel;
}
public static Model Build(IEnumerable<string> words)
{
var m = new Dictionary<string, List<string>>();
foreach (var (fst,snd) in words.Zip(words.Skip(1)))
{
m[fst] = m.GetValueOrDefault(fst, new List<string>());
m[fst].Add(snd);
}
return new Model(m);
}
private Random random = new Random();
private string Next(string seed)
{
if (!_languageModel.TryGetValue(seed, out var nextWords))
return null;
return nextWords[random.Next(nextWords.Count())];
}
public IEnumerable<string> Generate(string word)
{
do {
yield return word;
} while (null != (word = Next(word)));
}
}
class Program
{
static void Main(string[] args)
{
// Read from standard in
var input = args.Length >= 1 ? File.ReadAllText(args[0]).ToLower() : Console.In.ReadToEnd().ToLower();
// Split by whitespace and trim out blank words
var words = input.Split(null).Where(x => x.Trim().Length != 0);
// Create a language model
var model = Model.Build(words);
Console.Out.WriteLine(string.Join(" ", model.Generate("the").Take(100).ToArray()));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment