Skip to content

Instantly share code, notes, and snippets.

@sritchie
Created November 12, 2011 14:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sritchie/1360555 to your computer and use it in GitHub Desktop.
Save sritchie/1360555 to your computer and use it in GitHub Desktop.
(ns backtype.lyoto.deploy
(:use clojure.tools.cli))
(comment
;; ## Uberjar Deploy
(require '[clojure.string :as s]
'[pallet.stevedore :as stevedore]
'[pallet.execute :as execute]
'[backtype.lyoto.log :as log])
(defmacro script-output
[& forms]
`(let [^String result# (:out (execute/local-script ~@forms))]
(.trim result#)))
(defn filename [full-path]
(last (.split full-path "/")))
(defn scp-uberjar
[standalone-filepath dest-path key-path ip-or-dns]
(execute/local-script
(scp "-i" ~key-path
~standalone-filepath
~(format "hadoop@%s:%s"
ip-or-dns
dest-path))))
;; ## EMR Deploy and Kill
(def core-config-path "s3://hdfs2/hadoop/config.xml")
(defn parse-emr-config
[conf-map]
(->> (for [[k v] conf-map]
(format "-s,%s=%s" (name k) v))
(s/join ",")
(format "\"--core-config-file,%s,%s\"" core-config-path)))
(def backtype-cluster
{:swap-size 16384
:spot-price 4.00
:hardware-id "m1.large"
:config {:dfs.datanode.max.xcievers 5096
:mapred.reduce.max.attempts 12
:mapred.map.max.attempts 20
:mapred.jobtracker.completeuserjobs.maximum 600
:mapred.map.tasks.speculative.execution false
:mapred.reduce.tasks.speculative.execution false
:mapred.tasktracker.map.tasks.maximum 4
:mapred.tasktracker.reduce.tasks.maximum 2
:mapred.jobtracker.taskScheduler "org.apache.hadoop.mapred.FairScheduler"}})
(defn describe-emr []
(for [entry (-> (script-output (elastic-mapreduce --list))
(s/split-lines)
(butlast))
:let [[id status dns name] (s/split entry #"\s{5,}")]]
{:id id :status status :dns dns :name name}))
(defn name->key [key job-name]
(some (fn [{:keys [name status] :as job-map}]
(when (and (= name job-name)
(not= status "FAILED"))
(get job-map key)))
(describe-emr)))
(def name->id (partial name->key :id))
(defn boot-emr!
"Modify code to take group, type, count"
[name core-count task-count]
(if (name->id name)
(log/log-message "Cluster named " name " already exists!")
(let [{:keys [hardware-id spot-price swap-size config]} backtype-cluster]
(execute/local-script
(elastic-mapreduce --create
--name ~name
--alive
--instance-group master
--instance-type ~hardware-id
--instance-count 1
--instance-group core
--instance-type ~hardware-id
--instance-count ~core-count
--instance-group task
--instance-type ~hardware-id
--instance-count ~task-count
--bid-price ~spot-price
--enable-debugging
--bootstrap-action s3://hdfs2/hadoop/install-packages
--bootstrap-action
s3://elasticmapreduce/bootstrap-actions/configurations/latest/memory-intensive
--bootstrap-action
s3://elasticmapreduce/bootstrap-actions/add-swap
--args ~swap-size
--bootstrap-action
s3://elasticmapreduce/bootstrap-actions/configure-hadoop
--args ~(parse-emr-config config))))))
(defn kill-emr! [name]
(if (name->id name)
(do (log/log-message "Destroying EMR cluster...")
(execute/local-script
(elastic-mapreduce --destroy --name ~(name->id name)))
(log/log-message "Cluster destroyed."))
(log/log-message "Sorry, EMR cluster named " name " doesn't exist.")))
(defn jobtracker-dns [name]
(or (name->key :dns name)
(log/log-message "Sorry, no cluster named " name " exists.")))
(defn try-parse-int [i]
(try (Integer/parseInt i)
(catch Throwable _)))
;; Convert over to clojure.tools.cli
(defn -main
"TODO: Add help script for this, and open source; have it loop until
job's finished booting."
[& args]
(with-command-line args
"Provisioning tool for EMR clusters."
[[start? "Start Cluster?"]
[stop? "Stop Cluster?"]
[get-dns? "Stop Cluster?"]
[core-size "Core node count."]
[task-size "Task node count."]
[name "Job name" "dev"]]
(let [[core-size task-size] (map try-parse-int [core-size task-size])
name (str "lyoto-" name)]
(cond get-dns? (if-not name
(println "Please provide a cluster name.")
(jobtracker-dns name))
start? (if-not (and core-size task-size)
(println "Please define a cluster size.")
(boot-emr! name core-size task-size))
stop? (kill-emr! name)
:else ("Please provide some options!"))))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment