Skip to content

Instantly share code, notes, and snippets.

@SaschaDittmann
Created April 10, 2015 07:49
Show Gist options
  • Save SaschaDittmann/830aea645418ef2530bf to your computer and use it in GitHub Desktop.
Save SaschaDittmann/830aea645418ef2530bf to your computer and use it in GitHub Desktop.
MapReduce Entwurfsmuster – Numerische Aggregation (Min/Max/Count)
public class MinMaxCountCombiner :
JsonInOutReducerCombinerBase<MinMaxCountData, MinMaxCountData>
{
public override void Reduce(string key,
IEnumerable<MinMaxCountData> values,
JsonReducerCombinerContext<MinMaxCountData> context)
{
var data = values.ToList();
context.EmitKeyValue(key, new MinMaxCountData
{
Min = data.Min(v => v.Min),
Max = data.Max(v => v.Min),
Count = data.Sum(v => v.Count),
});
}
}
public class MinMaxCountData
{
public DateTime Min { get; set; }
public DateTime Max { get; set; }
public long Count { get; set; }
public override string ToString()
{
return String.Format("{0}t{1}t{2}", Min, Max, Count);
}
}
public class MinMaxCountJob
: HadoopJob<MinMaxCountMapper, MinMaxCountCombiner, MinMaxCountReducer>
{
public override HadoopJobConfiguration Configure(ExecutorContext context)
{
return new HadoopJobConfiguration
{
InputPath = "/samples/comments",
OutputFolder = "output/MinMaxCount"
};
}
}
static void Main(string[] args)
{
var hadoop = Hadoop.Connect();
hadoop.MapReduceJob.ExecuteJob<MinMaxCountJob>();
}
public class MinMaxCountMapper : JsonOutMapperBase<MinMaxCountData>
{
public override void Map(string inputLine, JsonMapperContext<MinMaxCountData> context)
{
var parsed = XmlUtils.ParseXml(inputLine);
if (parsed == null
|| !parsed.ContainsKey("CreationDate")
|| !parsed.ContainsKey("UserId"))
{
context.CoreContext.IncrementCounter(
"Min Max Count Mapper", "Invalid Rows", 1);
return;
}
DateTime creationDate;
if (!DateTime.TryParse(parsed["CreationDate"], out creationDate))
{
context.CoreContext.IncrementCounter(
"Min Max Count Mapper", "Invalid Creation Dates", 1);
return;
}
context.EmitKeyValue(parsed["UserId"], new MinMaxCountData
{
Min = creationDate,
Max = creationDate,
Count = 1,
});
}
}
public class MinMaxCountReducer :
JsonInReducerCombinerBase<MinMaxCountData>
{
public override void Reduce(string key,
IEnumerable<MinMaxCountData> values,
ReducerCombinerContext context)
{
var data = values.ToList();
context.EmitKeyValue(key, new MinMaxCountData
{
Min = data.Min(v => v.Min),
Max = data.Max(v => v.Min),
Count = data.Sum(v => v.Count),
}.ToString());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment