Last active
December 17, 2015 17:29
-
-
Save randallwhitman/5646184 to your computer and use it in GitHub Desktop.
Trip Discovery - By Origin Cell, Count Trips to Common Destination Cell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public void reduce(Text key, Iterable<TripCorrWrit> values, Context ctx) | |
throws IOException, InterruptedException { | |
final int INIT_SIZE = 8000; | |
HashMap<String,Long> records = new HashMap<String,Long>(INIT_SIZE); | |
String sval, maxDest = null; | |
long totCount = 0, maxCount = 0; | |
for (TripCorrWrit entry : values) { | |
sval = entry.toString(); // bounds of destination cell - value of iterator, key of hashmap | |
long newCount = records.containsKey(sval) ? 1 + records.get(sval) : 1; | |
records.put(sval, newCount); | |
if (newCount > maxCount) { | |
maxCount = newCount; | |
maxDest = sval; | |
} | |
totCount++; | |
} // /for | |
Configuration config = ctx.getConfiguration(); | |
int minPoints = config.getInt("com.esri.trip.threshold", 10); //minimum count per cell | |
minPoints = minPoints < 2 ? 1 : minPoints; | |
if (totCount >= minPoints) { | |
double pct = 0.; | |
if (maxCount > 1) // if only one trip going to each destination cell, report zero correlation. | |
pct = 100. * (double)maxCount / (double)totCount; | |
ctx.write(key, new Text(String.format("%d\t%d\t%f\t%s", | |
totCount, maxCount, pct, // calculated numbers | |
maxDest))); // most common destination cell (bounds) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment