Skip to content

Instantly share code, notes, and snippets.

@AndyCross
Created November 22, 2012 16:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AndyCross/4131891 to your computer and use it in GitHub Desktop.
Save AndyCross/4131891 to your computer and use it in GitHub Desktop.
StreamingUnit
using Elastacloud.Hadoop.StreamingUnitExample.Job.Map;
using Elastacloud.Hadoop.StreamingUnitExample.Job.Reduce;
using Microsoft.Hadoop.MapReduce;
namespace Elastacloud.Hadoop.StreamingUnitExample.Job
{
public class HelloWorldJob : HadoopJob<HelloWorldMapper, HelloWorldReducer>
{
public override HadoopJobConfiguration Configure(ExecutorContext context)
{
return new HadoopJobConfiguration();//here you would normally set up some input ;-)
}
}
}
using Microsoft.Hadoop.MapReduce;
namespace Elastacloud.Hadoop.StreamingUnitExample.Job.Map
{
public class HelloWorldMapper : MapperBase
{
public override void Map(string inputLine, MapperContext context)
{
//example input: Hello, Andy
if (!inputLine.StartsWith("Hello, ")) return;
var key = inputLine.Substring(7);
if (key.EndsWith(".")) key = key.Trim('.');
context.EmitKeyValue(key, "1");//we are going to count instances, the value is irrelevant
}
}
}
using System.Collections.Generic;
using System.Linq;
using Microsoft.Hadoop.MapReduce;
namespace Elastacloud.Hadoop.StreamingUnitExample.Job.Reduce
{
public class HelloWorldReducer : ReducerCombinerBase
{
public override void Reduce(string key, IEnumerable<string> values, ReducerCombinerContext context)
{
context.EmitKeyValue(key, values.Count().ToString());//count instances of this key
}
}
}
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
Map
Andy 1
Andy 1
Andy 1
andy 1
chickenface 1
why doesn't this work! 1
Reduce
Andy 3
andy 1
chickenface 1
why doesn't this work! 1
using System;
using Elastacloud.Hadoop.StreamingUnitExample.Job.Map;
using Elastacloud.Hadoop.StreamingUnitExample.Job.Reduce;
using Microsoft.Hadoop.MapReduce;
namespace Elastacloud.Hadoop.StreamingUnitExample
{
class Program
{
static void Main(string[] args)
{
var inputArray = new[]
{
"Hello, Andy",
"Hello, andy",
"Hello, why doesn't this work!",
"Hello, Andy",
"Hello, chickenface",
"Hello, Andy"
};
var output =
StreamingUnit.Execute<HelloWorldMapper, HelloWorldReducer>(inputArray);
Console.WriteLine("Map");
foreach (var mapperResult in output.MapperResult)
{
Console.WriteLine(mapperResult);
}
Console.WriteLine("Reduce");
foreach (var reducerResult in output.ReducerResult)
{
Console.WriteLine(reducerResult);
}
Console.ReadLine();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment