| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
| ;;; Sample of a Jobdef for a Streaming job | |
| ;;; | |
| ;;; Example of common usage: | |
| ;;; lemur run strm-jobdef.clj --bucket my-bucket-name | |
| ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
| (catch-args | |
| [:bucket "An s3 bucket, e.g. 'com.myco.bucket1'"] | |
| ) | |
| (defcluster sample-cluster | |
| :master-instance-type "m1.large" | |
| :slave-instance-type "m1.large" | |
| :num-instances 2 | |
| :keypair "tcc-integration" | |
| :enable-debugging? false | |
| :runtime-jar "/home/hadoop/contrib/streaming/hadoop-streaming.jar" | |
| ) | |
| (defstep sample-strm-step | |
| :args.positional | |
| ["-input" "s3://elasticmapreduce/samples/wordcount/input" | |
| "-output" "${data-uri}/out1" | |
| "-mapper" "s3://elasticmapreduce/samples/wordcount/wordSplitter.py" | |
| "-reducer" "aggregate"] | |
| ) | |
| (defstep second-strm-step | |
| :args.positional | |
| ["-input" "${data-uri}/out1" | |
| "-output" "${data-uri}/out2" | |
| "-mapper" "s3://elasticmapreduce/samples/wordcount/wordSplitter.py" | |
| "-reducer" "aggregate"] | |
| ) | |
| (fire! sample-cluster sample-strm-step second-strm-step) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment