Skip to content

Instantly share code, notes, and snippets.

@dcabines
Last active December 16, 2015 13:48
Show Gist options
  • Save dcabines/5444031 to your computer and use it in GitHub Desktop.
Save dcabines/5444031 to your computer and use it in GitHub Desktop.
List the top words used in a file and the number of times they are used.
using System;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
namespace TopWords {
internal class Program {
private static void Main(string[] args) {
const int maxWords = 298;
const int minWordLength = 4;
var file = args[0];
var text = File.ReadAllText(file).ToLower();
var regex = new Regex("[^a-zA-Z0-9' ]");
text = text.Replace(Environment.NewLine, " ");
text = regex.Replace(text, " ");
var words =
text.Split(' ')
.Where(word => word.Length >= minWordLength)
.GroupBy(word => word)
.Select(group => new {Word = group.Key, Count = group.Count()})
.OrderByDescending(word => word.Count)
.Take(maxWords)
.ToList();
var lines = words.Select(word => {
var count = word.Count.ToString("d4");
var position = (words.IndexOf(word) + 1).ToString("d3");
return string.Format(" {0} {1} {2}", position, count, word.Word);
});
Console.WriteLine();
foreach (var line in lines) {
Console.WriteLine(line);
}
Console.ReadLine();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment