Skip to content

Instantly share code, notes, and snippets.

@jorisbontje
Created April 4, 2012 09:29
Show Gist options
  • Save jorisbontje/2299935 to your computer and use it in GitHub Desktop.
Save jorisbontje/2299935 to your computer and use it in GitHub Desktop.
@SuppressWarnings("serial")
@PlatformRunner.Platform({ LocalPlatform.class, HadoopPlatform.class})
public class SortTest extends PlatformTestCase {
private static final inputFileSort = "src/test/data/sort.txt";
public SortTest() {
super(false);
}
@Test
public void testGroupByXor() throws IOException {
getPlatform().copyFromLocal(inputFileSort);
Fields sumFields = new Fields("id", "sum1", "sum2");
Tap source = getPlatform().getDelimitedFile(sumFields, "\t",
new Class[] { Long.TYPE, Long.TYPE, Long.TYPE }, inputFileSort,
SinkMode.KEEP);
Tap sink = getPlatform().getDelimitedFile(sumFields,
getOutputPath("sort"), SinkMode.REPLACE);
Pipe pipe = new Pipe("pipe");
Fields sortFields = new Fields("sum1");
sortFields.setComparator("sum1", Collections.reverseOrder());
pipe = new GroupBy(pipe, sortFields);
Properties properties = new Properties();
properties.put("mapred.reduce.tasks", 1);
Flow flow = getPlatform().getFlowConnector(properties)
.connect(source, sink, pipe);
flow.complete();
Tuple[] expected = new Tuple[] { new Tuple("2", "56000", "5000"),
new Tuple("1", "46000", "6000"),
new Tuple("2", "33000", "4000"),
new Tuple("1", "30000", "5000"),
new Tuple("1", "25000", "7000"),
new Tuple("2", "22000", "4000"),
new Tuple("1", "20000", "6000"),
new Tuple("2", "20000", "3000"),
new Tuple("2", "12000", "2500"),
new Tuple("1", "10000", "4500"),
new Tuple("1", "9000", "4000"), };
validateLength(flow, expected.length);
Iterator<Tuple> iterator = getSinkAsList(flow).iterator();
int count = 0;
while (iterator.hasNext()) {
assertEquals(expected[count++], iterator.next());
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment