Skip to content

Instantly share code, notes, and snippets.

@shiumachi
Created November 28, 2018 06:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shiumachi/bdde8314f7f53270fc9cbbbf99108fab to your computer and use it in GitHub Desktop.
Save shiumachi/bdde8314f7f53270fc9cbbbf99108fab to your computer and use it in GitHub Desktop.
#!/bin/sh
source util.sh
TMP_DIR=/tmp
HADOOP_BIN_DIR=${HADOOP_HOME}/bin
HADOOP_COMMAND=${HADOOP_BIN_DIR}/hadoop
HADOOP_START_HDFS_COMMAND=`which start-dfs.sh`
HADOOP_STOP_HDFS_COMMAND=`which stop-dfs.sh`
HADOOP_NAMENODE_FORMAT_COMMAND="${HADOOP_COMMAND} namenode -format"
HADOOP_HDFS_COMMAND="${HADOOP_COMMAND} fs"
HADOOP_TMP_DIR=${TMP_DIR}/hadoop
# local test data path
DATA_DIR=${HOME}/data
# hdfs path
HDFS_TMP_DIR=/tmp
HDFS_TEST_DIR=/test
HBASE_DIR=/hbase
# yarn
YARN_LOG_HDFS_DIR=/var/log/hadoop-yarn
# mrv2
MAPREDUCE_V2_HISTORY_DIR=/user/history
# basic mapreduce
TESTDATA_BASENAME=sherlock
TESTDATA_COMPRESSED=${DATA_DIR}/${TESTDATA_BASENAME}.tar.gz
TESTDATA=${TMP_DIR}/${TESTDATA_BASENAME}
HDFS_TESTDATA_DIR=${HDFS_TEST_DIR}/${TESTDATA_BASENAME}
# streaming script
STREAMING_SCRIPTS_BASENAME=streaming
STREAMING_SCRIPTS_COMPRESSED=${DATA_DIR}/${STREAMING_SCRIPTS_BASENAME}.tar.gz
STREAMING_SCRIPTS=${TMP_DIR}/${STREAMING_SCRIPTS_BASENAME}
HDFS_STREAMING_SCRIPTS_DIR=${HDFS_TEST_DIR}/${STREAMING_SCRIPTS_BASENAME}
# hive
TESTDATA_HIVE_BASENAME=hivedata
TESTDATA_HIVE_COMPRESSED=${DATA_DIR}/${TESTDATA_HIVE_BASENAME}.tar.gz
TESTDATA_HIVE=${TMP_DIR}/${TESTDATA_HIVE_BASENAME}
HDFS_TESTDATA_HIVE_DIR=${HDFS_TEST_DIR}/${TESTDATA_HIVE_BASENAME}
HASH=`uuidgen | cut -c 1-8`
usage(){
printf "usage: %s -prod\n" `basename $0` 1>&2
}
initialize(){
echo "initialize()"
mv ${HADOOP_TMP_DIR} ${HADOOP_TMP_DIR}.${HASH}
}
initialize_dryrun(){
echo "initialize()"
echo "mv ${HADOOP_TMP_DIR} ${HADOOP_TMP_DIR}.${HASH}"
}
namenode_format(){
echo "namenode_format()"
${HADOOP_STOP_HDFS_COMMAND}
${HADOOP_NAMENODE_FORMAT_COMMAND}
}
namenode_format_dryrun(){
echo "namenode_format()"
echo ${HADOOP_STOP_HDFS_COMMAND}
echo ${HADOOP_NAMENODE_FORMAT_COMMAND}
}
decompress_data(){
echo "decompress_data()"
tar xf ${TESTDATA_COMPRESSED} -C ${TMP_DIR}
tar xf ${STREAMING_SCRIPTS_COMPRESSED} -C ${TMP_DIR}
tar xf ${TESTDATA_HIVE_COMPRESSED} -C ${TMP_DIR}
}
decompress_data_dryrun(){
echo "decompress_data()"
echo "tar xf ${TESTDATA_COMPRESSED} -C ${TMP_DIR}"
echo "tar xf ${STREAMING_SCRIPTS_COMPRESSED} -C ${TMP_DIR}"
echo "tar xf ${TESTDATA_HIVE_COMPRESSED} -C ${TMP_DIR}"
}
start_dfs(){
echo "start_dfs()"
${HADOOP_START_HDFS_COMMAND}
}
start_dfs_dryrun(){
echo "start_dfs()"
echo "${HADOOP_START_HDFS_COMMAND}"
}
initialize_hdfs(){
echo "initialize_hdfs()"
${HADOOP_HDFS_COMMAND} -mkdir ${HDFS_TMP_DIR}
${HADOOP_HDFS_COMMAND} -chmod 777 ${HDFS_TMP_DIR}
${HADOOP_HDFS_COMMAND} -mkdir ${HDFS_TEST_DIR}
${HADOOP_HDFS_COMMAND} -chmod 777 ${HDFS_TEST_DIR}
${HADOOP_HDFS_COMMAND} -mkdir ${HBASE_DIR}
${HADOOP_HDFS_COMMAND} -chmod 777 ${HBASE_DIR}
${HADOOP_HDFS_COMMAND} -mkdir ${YARN_LOG_HDFS_DIR}
${HADOOP_HDFS_COMMAND} -chmod 777 ${YARN_LOG_HDFS_DIR}
${HADOOP_HDFS_COMMAND} -mkdir ${MAPREDUCE_V2_HISTORY_DIR}
${HADOOP_HDFS_COMMAND} -chmod 1777 ${MAPREDUCE_V2_HISTORY_DIR}
}
initialize_hdfs_dryrun(){
echo "initialize_hdfs()"
echo "${HADOOP_HDFS_COMMAND} -mkdir ${HDFS_TMP_DIR}"
echo "${HADOOP_HDFS_COMMAND} -chmod 777 ${HDFS_TMP_DIR}"
echo "${HADOOP_HDFS_COMMAND} -mkdir ${HDFS_TEST_DIR}"
echo "${HADOOP_HDFS_COMMAND} -chmod 777 ${HDFS_TEST_DIR}"
echo "${HADOOP_HDFS_COMMAND} -mkdir ${HBASE_DIR}"
echo "${HADOOP_HDFS_COMMAND} -chmod 777 ${HBASE_DIR}"
echo "${HADOOP_HDFS_COMMAND} -mkdir ${YARN_LOG_HDFS_DIR}"
echo "${HADOOP_HDFS_COMMAND} -chmod 777 ${YARN_LOG_HDFS_DIR}"
echo "${HADOOP_HDFS_COMMAND} -mkdir ${MAPREDUCE_V2_HISTORY_DIR}"
echo "${HADOOP_HDFS_COMMAND} -chmod 1777 ${MAPREDUCE_V2_HISTORY_DIR}"
}
put_testdata(){
echo "put_testdata()"
${HADOOP_HDFS_COMMAND} -put ${TESTDATA} ${HDFS_TESTDATA_DIR}
${HADOOP_HDFS_COMMAND} -put ${STREAMING_SCRIPTS} ${HDFS_STREAMING_SCRIPTS_DIR}
${HADOOP_HDFS_COMMAND} -put ${TESTDATA_HIVE} ${HDFS_TESTDATA_HIVE_DIR}
}
put_testdata_dryrun(){
echo "put_testdata()"
echo "${HADOOP_HDFS_COMMAND} -put ${TESTDATA} ${HDFS_TESTDATA_DIR}"
echo "${HADOOP_HDFS_COMMAND} -put ${STREAMING_SCRIPTS} ${HDFS_STREAMING_SCRIPTS_DIR}"
echo "${HADOOP_HDFS_COMMAND} -put ${TESTDATA_HIVE} ${HDFS_TESTDATA_HIVE_DIR}"
}
finalize(){
echo "finalize()"
chmod 777 ${HADOOP_TMP_DIR}
}
finalize_dryrun(){
echo "finalize()"
echo "chmod 777 ${HADOOP_TMP_DIR}"
}
cleanup(){
echo "cleanup()"
mv ${TESTDATA} ${TESTDATA}.${HASH}
mv ${TESTDATA_HIVE} ${TESTDATA_HIVE}.${HASH}
}
cleanup_dryrun(){
echo "cleanup()"
echo "mv ${TESTDATA} ${TESTDATA}.${HASH}"
echo "mv ${TESTDATA_HIVE} ${TESTDATA_HIVE}.${HASH}"
}
execute(){
if [ "${DRYRUN}" == 1 ]; then
initialize_dryrun
namenode_format_dryrun
start_dfs_dryrun
initialize_hdfs_dryrun
decompress_data_dryrun
put_testdata_dryrun
finalize_dryrun
cleanup_dryrun
else
initialize
namenode_format
start_dfs
initialize_hdfs
decompress_data
wait_seconds 5 # wait for leaving safemode
put_testdata
finalize
cleanup
fi
}
if [ $# == 1 ]; then
DRYRUN=0
elif [ $# == 0 ]; then
DRYRUN=1
else
usage
exit
fi
execute
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment