Skip to content

Instantly share code, notes, and snippets.

@SaschaDittmann
Created April 10, 2015 08:14
Show Gist options
  • Save SaschaDittmann/28e0159fdfac9db7f58d to your computer and use it in GitHub Desktop.
Save SaschaDittmann/28e0159fdfac9db7f58d to your computer and use it in GitHub Desktop.
MapReduce Entwurfsmuster – Numerische Aggregation (Standardabweichung 1/2)
public class MedianStdDevMapper : MapperBase
{
public override void Map(string inputLine, MapperContext context)
{
var parsed = XmlUtils.ParseXml(inputLine);
if (parsed == null
|| !parsed.ContainsKey("CreationDate")
|| !parsed.ContainsKey("Text"))
{
context.CoreContext.IncrementCounter(
"Median / Std. Dev. Mapper", "Invalid Rows", 1);
return;
}
DateTime creationDate;
if (!DateTime.TryParse(parsed["CreationDate"], out creationDate))
{
context.CoreContext.IncrementCounter(
"Median / Std. Dev. Mapper", "Invalid Creation Dates", 1);
return;
}
var text = parsed["Text"];
context.EmitKeyValue(
creationDate.Hour.ToString(CultureInfo.InvariantCulture),
text.Length.ToString(CultureInfo.InvariantCulture));
}
}
public class MedianStdDevReducer : ReducerCombinerBase
{
public override void Reduce(string key,
IEnumerable values,
ReducerCombinerContext context)
{
float sum = 0;
int count = 0;
var commentLengths = new List();
foreach (var value in values.Select(float.Parse))
{
commentLengths.Add(value);
sum += value;
count++;
}
commentLengths.Sort((x, y) => x.CompareTo(y));
// calculate median
double median;
if (count % 2 == 0)
{
// if commentLengths is an even value, average middle two elements
median = (commentLengths[Convert.ToInt32(count / 2 - 1)]
+ commentLengths[Convert.ToInt32(count / 2)]) / 2.0f;
} else {
// else, set median to middle value
median = commentLengths[Convert.ToInt32(count / 2)];
}
// calculate standard deviation
var avg = sum / count;
var sumOfSquares = commentLengths
.Sum(commentLength => Math.Pow(commentLength - avg, 2));
var stdDev = Math.Sqrt(sumOfSquares / (count - 1));
context.EmitKeyValue(
key,
String.Format("{0}t{1}", median, stdDev));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment