Created
February 4, 2012 21:56
-
-
Save rclayton-the-terrible/1740484 to your computer and use it in GitHub Desktop.
Cascading Demo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Scheme sourceScheme = new TextLine( new Fields( "line" ) ); | |
Tap source = new Hfs( sourceScheme, inputPath ); | |
Scheme sinkScheme = new TextLine( new Fields( "word", "count" ) ); | |
Tap sink = new Hfs( sinkScheme, outputPath, SinkMode.REPLACE ); | |
Pipe assembly = new Pipe( "wordcount" ); | |
String regex = "(?<!\\pL)(?=\\pL)[^ ]*(?<=\\pL)(?!\\pL)"; | |
Function function = new RegexGenerator( new Fields( "word" ), regex ); | |
assembly = new Each( assembly, new Fields( "line" ), function ); | |
assembly = new GroupBy( assembly, new Fields( "word" ) ); | |
Aggregator count = new Count( new Fields( "count" ) ); | |
assembly = new Every( assembly, count ); | |
Properties properties = new Properties(); | |
FlowConnector.setApplicationJarClass( properties, Main.class ); | |
FlowConnector flowConnector = new FlowConnector( properties ); | |
Flow flow = flowConnector.connect( "word-count", source, sink, assembly ); | |
flow.complete(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment