mmm/gist:1375100

## gistfile1.txt
# install juju

    $ sudo apt-get install juju charm-tools

configure your EC2 account...

    $ juju
    $ vi .juju/environments.yaml

add EC2 access/secret keys


# install charms

    $ mkdir -p ~/charms/oneiric
    $ cd ~/charms/oneiric
    $ charm get hadoop-master
    $ charm get hadoop-slave
    $ charm get ganglia


# set up stack script

Save the following as ~/run-my-stack.sh

    #!/bin/bash

    juju_root=$HOME

    ###

    echo "deploying stack"

    juju deploy --repository "$juju_root/charms" local:hadoop-master namenode
    juju deploy --repository "$juju_root/charms" local:ganglia jobmonitor
    juju deploy --repository "$juju_root/charms" local:hadoop-slave datacluster
    juju add-relation namenode datacluster
    juju add-relation jobmonitor datacluster
    for i in {1..2}; do
      juju add-unit datacluster
    done
    juju expose jobmonitor
    juju expose namenode

    echo "done"

and make it executable

    $ chmod +x run-my-stack.sh


# run

    $ juju bootstrap

wait a bit until

    $ juju status

returns something meaningful.


Spin up the stack

    $ ./run-my-stack.sh


Watch `juju status`... once everything is up and happy,

    $ juju ssh namenode/0

then start up the job

    ubuntu@<ec2-url>$ sudo -su hdfs /usr/lib/hadoop/teragen.sh


# watch it run

You can see ganglia...
Grab the jobmonitor url from `juju status` and open a browser to:

    http://<jobmonitor-url>/ganglia

You can watch the namenode progress...
Grab the namenode url from `juju status` and open a browser to:

    http://<namenode-url>:50070/

and

    http://<namenode-url>:50030/


# add nodes

You can add nodes in the middle of a run:

    $ juju add-unit datacluster
    $ juju add-unit datacluster
    $ juju add-unit datacluster

and hadoop'll pick it up fine.  takes a bit for it to show up on ganglia.

# optionally configure the job

You can run the whole thing using defaults as above or pass in specific job config.
(might wanna do this if you want the demo to run for a while and/or add a bunch of nodes)

Save the following as ~/demo-namenode.yaml

    namenode:
      job_size: 10000000
      job_maps: 400
      job_reduces: 400
      job_data_dir: in_one
      job_output_dir: out_one


change the namenode line in run-my-stack.sh from

    juju deploy --repository "$juju_root/charms" local:hadoop-master namenode

to

    juju deploy --repository "$juju_root/charms" --config "$juju_root/demo-namenode.yaml" local:hadoop-master namenode
	# install juju

	$ sudo apt-get install juju charm-tools

	configure your EC2 account...

	$ juju
	$ vi .juju/environments.yaml

	add EC2 access/secret keys


	# install charms

	$ mkdir -p ~/charms/oneiric
	$ cd ~/charms/oneiric
	$ charm get hadoop-master
	$ charm get hadoop-slave
	$ charm get ganglia


	# set up stack script

	Save the following as ~/run-my-stack.sh

	#!/bin/bash

	juju_root=$HOME

	###

	echo "deploying stack"

	juju deploy --repository "$juju_root/charms" local:hadoop-master namenode
	juju deploy --repository "$juju_root/charms" local:ganglia jobmonitor
	juju deploy --repository "$juju_root/charms" local:hadoop-slave datacluster
	juju add-relation namenode datacluster
	juju add-relation jobmonitor datacluster
	for i in {1..2}; do
	juju add-unit datacluster
	done
	juju expose jobmonitor
	juju expose namenode

	echo "done"

	and make it executable

	$ chmod +x run-my-stack.sh


	# run

	$ juju bootstrap

	wait a bit until

	$ juju status

	returns something meaningful.


	Spin up the stack

	$ ./run-my-stack.sh


	Watch `juju status`... once everything is up and happy,

	$ juju ssh namenode/0

	then start up the job

	ubuntu@<ec2-url>$ sudo -su hdfs /usr/lib/hadoop/teragen.sh


	# watch it run

	You can see ganglia...
	Grab the jobmonitor url from `juju status` and open a browser to:

	http://<jobmonitor-url>/ganglia

	You can watch the namenode progress...
	Grab the namenode url from `juju status` and open a browser to:

	http://<namenode-url>:50070/

	and

	http://<namenode-url>:50030/




	# add nodes

	You can add nodes in the middle of a run:

	$ juju add-unit datacluster
	$ juju add-unit datacluster
	$ juju add-unit datacluster

	and hadoop'll pick it up fine. takes a bit for it to show up on ganglia.

	# optionally configure the job

	You can run the whole thing using defaults as above or pass in specific job config.
	(might wanna do this if you want the demo to run for a while and/or add a bunch of nodes)

	Save the following as ~/demo-namenode.yaml

	namenode:
	job_size: 10000000
	job_maps: 400
	job_reduces: 400
	job_data_dir: in_one
	job_output_dir: out_one


	change the namenode line in run-my-stack.sh from

	juju deploy --repository "$juju_root/charms" local:hadoop-master namenode

	to

	juju deploy --repository "$juju_root/charms" --config "$juju_root/demo-namenode.yaml" local:hadoop-master namenode