Skip to content

Instantly share code, notes, and snippets.

@cilliemalan
Created February 22, 2017 13:44
Show Gist options
  • Save cilliemalan/a2fcc9d749d6cbfb90bbce4ead30cfdf to your computer and use it in GitHub Desktop.
Save cilliemalan/a2fcc9d749d6cbfb90bbce4ead30cfdf to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
namespace DataProcessorTemplate
{
/// <summary>
/// Program that counts the number of lines in which each word occurs.
/// </summary>
class Program
{
static void Main(string[] args)
{
var input = ReadInput(args[0]);
var output = Process(input);
WriteOutput(output, args[1]);
}
/// <summary>
/// Reads the input
/// </summary>
/// <param name="filename">The file to read from</param>
private static InputStructure ReadInput(string filename)
{
using (var reader = File.OpenText(filename))
{
//for example
List<string> lines = new List<string>();
while (!reader.EndOfStream)
{
lines.Add(reader.ReadLine());
}
return new InputStructure
{
Lines = lines.ToArray()
};
}
}
/// <summary>
/// Process input into output
/// </summary>
/// <param name="input">The input to process</param>
private static OutputStructure Process(InputStructure input)
{
//remove duplicate items within lines
var narrowed = input.Lines.Select(x => Regex.Split(x, @"\s+").Distinct(StringComparer.OrdinalIgnoreCase));
//flatten
var flat = narrowed.SelectMany(x => x);
//group
var grouped = flat.GroupBy(x => x, StringComparer.OrdinalIgnoreCase);
//count
var dictionary = grouped.ToDictionary(x => x.Key, x => x.Count());
//structure
var result = new OutputStructure
{
WordOccurrences = dictionary
};
return result;
}
/// <summary>
/// Writes an output structure to a file
/// </summary>
/// <param name="output">The structure to write</param>
/// <param name="outputFileName">The file to write to</param>
private static void WriteOutput(OutputStructure output, string outputFileName)
{
//for example
using (var outfile = new StreamWriter(File.OpenWrite(outputFileName)))
{
foreach (var word in output.WordOccurrences)
{
outfile.WriteLine($"{word.Key}\t{word.Value}");
}
}
}
}
/// <summary>
/// The input structure. It contains all the input lines
/// </summary>
public class InputStructure
{
//for example
public string[] Lines { get; set; }
}
/// <summary>
/// The output structure. It contains a collection of words with
/// the number of lines in which each ocurred.
/// </summary>
public class OutputStructure
{
//for example
public IDictionary<string, int> WordOccurrences { get; set; }
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment