EMR Steps
Hadoop Streaming step | |
{ | |
"jobFlowId": "j-1JKGNMYXT59DP", | |
"steps": [ | |
{ | |
"name": "EMRHadoopLogPushingTest", | |
"actionOnFailure": "CANCEL_AND_WAIT", | |
"hadoopJarStep": { | |
"jar": "command-runner.jar", | |
"args": [ | |
"/usr/bin/hadoop-streaming", | |
"-Dmapred.child.java.opts=-Xmx1024m", | |
"-files", | |
"<s3 location to py file containing mapper and reduer>", | |
"-input", | |
"<s3 location to input file>", | |
"-output", | |
"<s3 location for output>", | |
"-mapper", | |
"<py file containing the mapper>", | |
"-reducer", | |
"aggregate" | |
] | |
} | |
} | |
], | |
"progressListener": {}, | |
"requestClientOptions": { | |
"markers": {}, | |
"readLimit": 131073 | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment