-
-
Save jblomo/1806296 to your computer and use it in GitHub Desktop.
backtype EMR configs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns backtype.lyoto.deploy | |
(:use clojure.tools.cli)) | |
(comment | |
;; ## Uberjar Deploy | |
(require '[clojure.string :as s] | |
'[pallet.stevedore :as stevedore] | |
'[pallet.execute :as execute] | |
'[backtype.lyoto.log :as log]) | |
(defmacro script-output | |
[& forms] | |
`(let [^String result# (:out (execute/local-script ~@forms))] | |
(.trim result#))) | |
(defn filename [full-path] | |
(last (.split full-path "/"))) | |
(defn scp-uberjar | |
[standalone-filepath dest-path key-path ip-or-dns] | |
(execute/local-script | |
(scp "-i" ~key-path | |
~standalone-filepath | |
~(format "hadoop@%s:%s" | |
ip-or-dns | |
dest-path)))) | |
;; ## EMR Deploy and Kill | |
(def core-config-path "s3://hdfs2/hadoop/config.xml") | |
(defn parse-emr-config | |
[conf-map] | |
(->> (for [[k v] conf-map] | |
(format "-s,%s=%s" (name k) v)) | |
(s/join ",") | |
(format "\"--core-config-file,%s,%s\"" core-config-path))) | |
(def backtype-cluster | |
{:swap-size 16384 | |
:spot-price 4.00 | |
:hardware-id "m1.large" | |
:config {:dfs.datanode.max.xcievers 5096 | |
:mapred.reduce.max.attempts 12 | |
:mapred.map.max.attempts 20 | |
:mapred.jobtracker.completeuserjobs.maximum 600 | |
:mapred.map.tasks.speculative.execution false | |
:mapred.reduce.tasks.speculative.execution false | |
:mapred.tasktracker.map.tasks.maximum 4 | |
:mapred.tasktracker.reduce.tasks.maximum 2 | |
:mapred.jobtracker.taskScheduler "org.apache.hadoop.mapred.FairScheduler"}}) | |
(defn describe-emr [] | |
(for [entry (-> (script-output (elastic-mapreduce --list)) | |
(s/split-lines) | |
(butlast)) | |
:let [[id status dns name] (s/split entry #"\s{5,}")]] | |
{:id id :status status :dns dns :name name})) | |
(defn name->key [key job-name] | |
(some (fn [{:keys [name status] :as job-map}] | |
(when (and (= name job-name) | |
(not= status "FAILED")) | |
(get job-map key))) | |
(describe-emr))) | |
(def name->id (partial name->key :id)) | |
(defn boot-emr! | |
"Modify code to take group, type, count" | |
[name core-count task-count] | |
(if (name->id name) | |
(log/log-message "Cluster named " name " already exists!") | |
(let [{:keys [hardware-id spot-price swap-size config]} backtype-cluster] | |
(execute/local-script | |
(elastic-mapreduce --create | |
--name ~name | |
--alive | |
--instance-group master | |
--instance-type ~hardware-id | |
--instance-count 1 | |
--instance-group core | |
--instance-type ~hardware-id | |
--instance-count ~core-count | |
--instance-group task | |
--instance-type ~hardware-id | |
--instance-count ~task-count | |
--bid-price ~spot-price | |
--enable-debugging | |
--bootstrap-action s3://hdfs2/hadoop/install-packages | |
--bootstrap-action | |
s3://elasticmapreduce/bootstrap-actions/configurations/latest/memory-intensive | |
--bootstrap-action | |
s3://elasticmapreduce/bootstrap-actions/add-swap | |
--args ~swap-size | |
--bootstrap-action | |
s3://elasticmapreduce/bootstrap-actions/configure-hadoop | |
--args ~(parse-emr-config config)))))) | |
(defn kill-emr! [name] | |
(if (name->id name) | |
(do (log/log-message "Destroying EMR cluster...") | |
(execute/local-script | |
(elastic-mapreduce --destroy --name ~(name->id name))) | |
(log/log-message "Cluster destroyed.")) | |
(log/log-message "Sorry, EMR cluster named " name " doesn't exist."))) | |
(defn jobtracker-dns [name] | |
(or (name->key :dns name) | |
(log/log-message "Sorry, no cluster named " name " exists."))) | |
(defn try-parse-int [i] | |
(try (Integer/parseInt i) | |
(catch Throwable _))) | |
;; Convert over to clojure.tools.cli | |
(defn -main | |
"TODO: Add help script for this, and open source; have it loop until | |
job's finished booting." | |
[& args] | |
(with-command-line args | |
"Provisioning tool for EMR clusters." | |
[[start? "Start Cluster?"] | |
[stop? "Stop Cluster?"] | |
[get-dns? "Stop Cluster?"] | |
[core-size "Core node count."] | |
[task-size "Task node count."] | |
[name "Job name" "dev"]] | |
(let [[core-size task-size] (map try-parse-int [core-size task-size]) | |
name (str "lyoto-" name)] | |
(cond get-dns? (if-not name | |
(println "Please provide a cluster name.") | |
(jobtracker-dns name)) | |
start? (if-not (and core-size task-size) | |
(println "Please define a cluster size.") | |
(boot-emr! name core-size task-size)) | |
stop? (kill-emr! name) | |
:else ("Please provide some options!")))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment