Skip to content

Instantly share code, notes, and snippets.

@geofferyzh
Created October 5, 2012 14:12
Show Gist options
  • Save geofferyzh/3840001 to your computer and use it in GitHub Desktop.
Save geofferyzh/3840001 to your computer and use it in GitHub Desktop.
Hadoop 101 - Reducer Iterator to Arraylist
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.util.List;
import java.util.ArrayList;
public class CosineReducer2 extends Reducer<Text, Text, Text, IntWritable> {
// Reduce task ----------------------------------------------------------------
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
// Input looks like: User1 (App1,0.333 App2,0.444 App3,0.555)
List<String> vArrayList = new ArrayList<String>();
for(Text v : values) {
vArrayList.add(v.toString());
}
for(int i=0; i < vArrayList.size(); i++) {
for(int j=i+1; j < vArrayList.size(); j++){
// Output looks like: App1,0.333:App2,0.444 1
// App1,0.333:App3,0.555 1
// App2,0.444:App3:0.555 1
// App1 should always come before App2, so that (App1,App2) is the same as (App2,App1)
if(vArrayList.get(i).compareTo(vArrayList.get(j)) < 0){
context.write(new Text(vArrayList.get(i) + ":" + vArrayList.get(j)), new IntWritable(1));
}
else context.write(new Text(vArrayList.get(j) + ":" + vArrayList.get(i)), new IntWritable(1));
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment