Skip to content

Instantly share code, notes, and snippets.

Keybase proof

I hereby claim:

  • I am cwensel on github.
  • I am cwensel (https://keybase.io/cwensel) on keybase.
  • I have a public key ASBqoJBj_Oaqu3SaUaTWJxzehB-i1AtD1hGnd8HSKsu8hQo

To claim this, I am signing this object:

# change the yarn bucket name
OPTIONS="--bootstrap-action s3://elasticmapreduce/bootstrap-actions/configure-hadoop"
OPTIONS="${OPTIONS} --args -y,yarn.log-aggregation-enable=true,-y,yarn.log-aggregation.retain-seconds=-1,-y,yarn.log-aggregation.retain-check-interval-seconds=3000,-y,yarn.nodemanager.remote-app-log-dir=s3://your-bucket/emr/yarn-logs"
@cwensel
cwensel / gist:9102c10d24823b6ba3b5
Last active August 29, 2015 14:04
A prototype fluent API for Cascading
// Factories for all Operations (Functions, Filters, Aggregators, and Buffers)
Function splitter = Fluid.function()
.RegexSplitter()
.fieldDeclaration( fields( "num", "char" ) )
.patternString( " " )
.end();
// An assembly builder chaining Pipes into complex assemblies
AssemblyBuilder.Start assembly = Fluid.assembly();
@Test
public void testTrapTapSourceSinkCopy() throws Exception
{
getPlatform().copyFromLocal( inputFileApache );
Scheme scheme = getPlatform().getTestFailScheme();
Tap source = getPlatform().getTap( scheme, inputFileApache, SinkMode.KEEP );
Pipe pipe = new Pipe( "map" );
String statement = "select *\n"
+ "from \"example\".\"sales_fact_1997\" as s\n"
+ "join \"example\".\"employee\" as e\n"
+ "on e.\"EMPID\" = s.\"CUST_ID\"";
Tap empTap = getPlatform().getDelimitedFile( ",", "\"", new SQLTypeResolver(), DATA_EMPLOYEE, SinkMode.KEEP );
Tap salesTap = getPlatform().getDelimitedFile( ",", "\"", new SQLTypeResolver(), DATA_SALESFACT, SinkMode.KEEP );
Tap resultsTap = getPlatform().getDelimitedFile( ",", "\"", new SQLTypeResolver(), getOutputPath( "dynamic" ), SinkMode.REPLACE );
#!/bin/bash
curl -s -XPUT localhost:9200/_template/template_tags -d '{
"template" : "tags*",
"settings" : {
"index.analysis.analyzer.csv.type" : "pattern",
"index.analysis.analyzer.csv.pattern" : ","
},
"mappings" : {
"_default_" : {
@Test
public void testSameSourceMergeThreeChainGroup() throws Exception
{
getPlatform().copyFromLocal( inputFileLower );
Tap sourceLower = getPlatform().getTextFile( inputFileLower );
Map sources = new HashMap();
sources.put( "split", sourceLower );
@cwensel
cwensel / gist:1182739
Created August 31, 2011 03:19
Properties builder
Map<Object, Object> properties = getProperties();
properties = appProps()
.setName( appName )
.setVersion( appVersion )
.addTag( stamp )
.addTag( appName )
.addTag( "app-tag" )
.build( properties ); // or buildProperties() or ?
public void testComplexLogic() throws Exception
{
if( !new File( inputFileLhs ).exists() )
fail( "data file not found" );
copyFromLocal( inputFileLhs );
Tap source = new Hfs( new TextDelimited( new Fields( "num", "char" ), " " ), inputFileLhs );
Pipe pipe = new Pipe( "test" );
public class WritableSequenceFile extends SequenceFile
{
protected Class<? extends Writable> writableType;
public WritableSequenceFile( Class<? extends Writable> writableType, Fields fields )
{
super( fields );
this.writableType = writableType;
}