dgadiraju/cdh-admin-benchmark-commands-01-teragen.sh

## cdh-admin-benchmark-commands-01-teragen.sh
# TeraGen – Generating the generates random data that can be used as input data,
# TeraGen takes the number of 100-byte rows and output directory as options.

# To Generate a file of 325MB size
hadoop jar \
  /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar \
  teragen 3407872 /user/itversity/teragen

# To generate a file of 325MB size, with blocksize of 64MB
hadoop jar \
  /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar \
  teragen \
  -D dfs.blocksize=67108864 3407872 \
  /user/itversity/teragen


## cdh-admin-benchmark-commands-02-terasort.sh
# TeraSort – Runs a MapReduce job to sort on the data,
# it takes the source and destinations paths as options as shown in the below command.
hadoop jar \
  /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar \
  terasort /user/itversity/teragen /user/itversity/terasort

## cdh-admin-benchmark-commands-03-teravalidate.sh
# TeraValidate – To validate the sorted output, ensures that the output data of TeraSort is globally sorted. It takes two options mainly, the source directory which is the output directory of terasort and destination directory as shown below.
hadoop jar \
  /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar \
  teravalidate /user/itversity/terasort /user/itversity/teravalidate

## cdh-admin-benchmark-commands-04-testhdfsio.sh
# And, for HDFS stress testing and to discover performance bottlenecks in the network we will be using TestDFSIO benchmark, which is a read and write test for HDFS. The default output directory is /benchmarks/TestDFSIO.

# Become a superuser
sudo su - hdfs

# Write test – to run a write test that generates 3 output files of size 100 MB
hadoop jar \
  /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-client-jobclient-tests.jar \
  TestDFSIO -write -nrFiles 3 -size 100MB

# Read test – to run the corresponding read test using 3 input files of size 100 MB
hadoop jar \
  /opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-client-jobclient-tests.jar \
  TestDFSIO -read -nrFiles 3 -fileSize 100MB
	# TeraGen – Generating the generates random data that can be used as input data,
	# TeraGen takes the number of 100-byte rows and output directory as options.

	# To Generate a file of 325MB size
	hadoop jar \
	/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar \
	teragen 3407872 /user/itversity/teragen

	# To generate a file of 325MB size, with blocksize of 64MB
	hadoop jar \
	/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar \
	teragen \
	-D dfs.blocksize=67108864 3407872 \
	/user/itversity/teragen
	# TeraSort – Runs a MapReduce job to sort on the data,
	# it takes the source and destinations paths as options as shown in the below command.
	hadoop jar \
	/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar \
	terasort /user/itversity/teragen /user/itversity/terasort
	# TeraValidate – To validate the sorted output, ensures that the output data of TeraSort is globally sorted. It takes two options mainly, the source directory which is the output directory of terasort and destination directory as shown below.
	hadoop jar \
	/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar \
	teravalidate /user/itversity/terasort /user/itversity/teravalidate
	# And, for HDFS stress testing and to discover performance bottlenecks in the network we will be using TestDFSIO benchmark, which is a read and write test for HDFS. The default output directory is /benchmarks/TestDFSIO.

	# Become a superuser
	sudo su - hdfs

	# Write test – to run a write test that generates 3 output files of size 100 MB
	hadoop jar \
	/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-client-jobclient-tests.jar \
	TestDFSIO -write -nrFiles 3 -size 100MB

	# Read test – to run the corresponding read test using 3 input files of size 100 MB
	hadoop jar \
	/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-mapreduce-client-jobclient-tests.jar \
	TestDFSIO -read -nrFiles 3 -fileSize 100MB