Skip to content

Instantly share code, notes, and snippets.

@randallwhitman
Last active December 17, 2015 17:29
Show Gist options
  • Save randallwhitman/5646184 to your computer and use it in GitHub Desktop.
Save randallwhitman/5646184 to your computer and use it in GitHub Desktop.
Trip Discovery - By Origin Cell, Count Trips to Common Destination Cell
public void reduce(Text key, Iterable<TripCorrWrit> values, Context ctx)
throws IOException, InterruptedException {
final int INIT_SIZE = 8000;
HashMap<String,Long> records = new HashMap<String,Long>(INIT_SIZE);
String sval, maxDest = null;
long totCount = 0, maxCount = 0;
for (TripCorrWrit entry : values) {
sval = entry.toString(); // bounds of destination cell - value of iterator, key of hashmap
long newCount = records.containsKey(sval) ? 1 + records.get(sval) : 1;
records.put(sval, newCount);
if (newCount > maxCount) {
maxCount = newCount;
maxDest = sval;
}
totCount++;
} // /for
Configuration config = ctx.getConfiguration();
int minPoints = config.getInt("com.esri.trip.threshold", 10); //minimum count per cell
minPoints = minPoints < 2 ? 1 : minPoints;
if (totCount >= minPoints) {
double pct = 0.;
if (maxCount > 1) // if only one trip going to each destination cell, report zero correlation.
pct = 100. * (double)maxCount / (double)totCount;
ctx.write(key, new Text(String.format("%d\t%d\t%f\t%s",
totCount, maxCount, pct, // calculated numbers
maxDest))); // most common destination cell (bounds)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment