Skip to content

Instantly share code, notes, and snippets.

@peterdn
Created October 27, 2010 22:32
Show Gist options
  • Save peterdn/650167 to your computer and use it in GitHub Desktop.
Save peterdn/650167 to your computer and use it in GitHub Desktop.
The beginnings of an HMM POS tagger for my computational linguistics course
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Reflection;
using System.IO;
namespace HMMTagger
{
class Tuple<T1, T2>
{
public T1 Item1 { get; set; }
public T2 Item2 { get; set; }
public Tuple(T1 Item1, T2 Item2)
{
this.Item1 = Item1;
this.Item2 = Item2;
}
public override bool Equals(object obj)
{
if (!(obj is Tuple<T1, T2>))
return false;
var t = obj as Tuple<T1, T2>;
return t.Item1.Equals(Item1) && t.Item2.Equals(Item2);
}
public override int GetHashCode()
{
return Item1.GetHashCode() + Item2.GetHashCode();
}
public override string ToString()
{
return string.Format("({0}, {1}",
Item1.ToString(), Item2.ToString());
}
}
class Program
{
static Dictionary<Tuple<string, string>, int> categoryFreqs;
static Dictionary<Tuple<string, string>, int> wordFreqs;
static void Main(string[] args)
{
if (args.Length < 1)
{
Console.WriteLine("Usage: {0} [file]",
Path.GetFileName(Assembly.GetExecutingAssembly().Location));
return;
}
FileStream file;
try
{
file = File.OpenRead(args[0]);
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
return;
}
using (StreamReader reader = new StreamReader(file))
{
categoryFreqs = new Dictionary<Tuple<string, string>, int>();
wordFreqs = new Dictionary<Tuple<string, string>, int>();
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
ProcessLine(line);
}
}
var populars = (from v in wordFreqs orderby v.Value descending select v).Take(10);
foreach (var p in populars)
Console.WriteLine("{0} - {1}", p.Key, p.Value);
}
static void ProcessLine(string line)
{
if (line == "")
return;
var words = line.Split(new[] { ' ' });
var cleanWords = from w in words where w.Contains('/') select w;
foreach (var w in cleanWords)
{
var components = w.Split(new[] { '/' });
if (components.Length != 2)
continue;
var key = new Tuple<string, string>(components[0], components[1]);
if (!wordFreqs.ContainsKey(key))
wordFreqs.Add(key, 1);
else
++wordFreqs[key];
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment