/conf files

## conf files
core-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
     <property>
         <name>fs.default.name</name>
         <value>hdfs://namenode.data.net:9011</value>
     </property>
     <property>
         <name>hadoop.tmp.dir</name>
         <value>/tmp/hadoop_test_fullydistributed_${user.name}</value>
         <final>true</final>
         <description>A base for other temporary directories.</description>
      </property>
</configuration>
-------------------------------------------------------------------------------------------
hdfs-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
   <property>
                <name>dfs.hosts.exclude</name>
                <value>/SHARE/USERFS/els7/users/surfer/exclude_hosts</value>
   </property>

   <property>
                <!-- Path on the local filesystem where the NameNode stores
                     the namespace and transactions logs persistently.  If
                     this is a comma-delimited list of directories then the
                     name table is replicated in all of the directories, for
                     redundancy. -->
                <name>dfs.name.dir</name>
                <value>/scratch/hadoop_test_surfer/dfs/name/</value>
                <final>true</final>
   </property>

   <property>
                <!-- Comma separated list of paths on the local filesystem of
                     a DataNode where it should store its blocks.  If this is
                     a comma-delimited list of directories, then data will be
                     stored in all named directories, typically on different
                     devices. -->
                <name>dfs.data.dir</name>
                <value>/scratch/hadoop_test_surfer/dfs/data/</value> <!-- we only have one local disk -->
                <final>true</final>
   </property>

   <property>
                <name>dfs.replication</name>
                <value>1</value>
   </property>

<!-- for secondarynamenode -->
   <property>
   <name>dfs.http.address</name>
   <value>namenode.data.net:50070</value>
   </property>
<!--added for hbase support -->
  <property>
    <name>dfs.support.append</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.datanode.max.xcievers</name>
    <value>4096</value>
  </property>


</configuration>
--------------------------------------------------------------------------------
mapred-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
        <property>
                <name>mapred.job.tracker</name>
                <value>namenode.data.net:9012</value>
        </property>

        <property>
                <name>mapred.system.dir</name>
                <value>/hadoop/mapred/system</value>
                <final>true</final>
        </property>

        <property>
                <name>mapred.local.dir</name>
                <value>/scratch/hadoop_test_surfer/tmp</value>
        </property>

        <property>
                <name>mapred.system.dir</name>
                <value>/hadoop/mapred/system</value>
                <final>true</final>
        </property>

        <property>
                <name>mapred.local.dir</name>
                <value>/scratch/hadoop_test_surfer/tmp</value>
        </property>

        <property>
                <name>mapred.tasktracker.map.tasks.maximum</name><value>3</value>
        </property>
        <property>
                <name>mapred.tasktracker.reduce.tasks.maximum</name><value>3</value>
        </property>

        <property>
                <name>mapred.task.timeout</name>
                <value>1800000</value> <!-- 30 minutes -->
        </property>

</configuration>

---------------------------------------------------------------------------------------------
hadoop-env.sh

# The only required environment variable is JAVA_HOME.  All others are
# optional.  When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.

# The java implementation to use.  Required.
# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
export JAVA_HOME=/Archive/Software/Java/Sun/jdk1.6.0_22/

# Extra Java CLASSPATH elements.  Optional.
# export HADOOP_CLASSPATH=
export HADOOP_CLASSPATH=/SHARE/USERFS/els7/users/surfer/hbase-0.92.1/hbase-0.92.1.jar:/SHARE/USERFS/els7/users/surfer/hbase-0.92.1/lib/zookeeper-3.4.3.jar

# The maximum amount of heap to use, in MB. Default is 1000.
# export HADOOP_HEAPSIZE=2000

# Extra Java runtime options.  Empty by default.
# export HADOOP_OPTS=-server

# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
# export HADOOP_TASKTRACKER_OPTS=
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
# export HADOOP_CLIENT_OPTS

# Extra ssh options.  Empty by default.
# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"

# Where log files are stored.  $HADOOP_HOME/logs by default.
# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
#export HADOOP_LOG_DIR=/scratch/hadoop_test_surfer/hadoop-logs
export HADOOP_LOG_DIR=${MyPath}/logs

# File naming remote slave hosts.  $HADOOP_HOME/conf/slaves by default.
# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
export HADOOP_SLAVES=${MyPath}/slaves

# host:path where hadoop code should be rsync'd from.  Unset by default.
# export HADOOP_MASTER=master:/home/$USER/src/hadoop
#export HADOOP_LOG_DIR=/scratch/hadoop_test_surfer/hadoop-logs
export HADOOP_LOG_DIR=SHARE/USERFS/els7/users/surfer/hadoop_conf/logs

# File naming remote slave hosts.  $HADOOP_HOME/conf/slaves by default.
# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
export HADOOP_SLAVES=SHARE/USERFS/els7/users/surfer/hadoop_conf/slaves

# host:path where hadoop code should be rsync'd from.  Unset by default.
# export HADOOP_MASTER=master:/home/$USER/src/hadoop

# Seconds to sleep between slave commands.  Unset by default.  This
# can be useful in large clusters, where, e.g., slave rsyncs can
# otherwise arrive faster than the master can service them.
# export HADOOP_SLAVE_SLEEP=0.1

# The directory where pid files are stored. /tmp by default.
# export HADOOP_PID_DIR=/var/hadoop/pids

# A string representing this instance of hadoop. $USER by default.
# export HADOOP_IDENT_STRING=$USER

# The scheduling priority for daemon processes.  See 'man nice'.
# export HADOOP_NICENESS=10
	core-site.xml

	<?xml version="1.0"?>
	<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

	<!-- Put site-specific property overrides in this file. -->

	<configuration>
	<property>
	<name>fs.default.name</name>
	<value>hdfs://namenode.data.net:9011</value>
	</property>
	<property>
	<name>hadoop.tmp.dir</name>
	<value>/tmp/hadoop_test_fullydistributed_${user.name}</value>
	<final>true</final>
	<description>A base for other temporary directories.</description>
	</property>
	</configuration>
	-------------------------------------------------------------------------------------------
	hdfs-site.xml

	<?xml version="1.0"?>
	<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

	<!-- Put site-specific property overrides in this file. -->

	<configuration>
	<property>
	<name>dfs.hosts.exclude</name>
	<value>/SHARE/USERFS/els7/users/surfer/exclude_hosts</value>
	</property>

	<property>
	<!-- Path on the local filesystem where the NameNode stores
	the namespace and transactions logs persistently. If
	this is a comma-delimited list of directories then the
	name table is replicated in all of the directories, for
	redundancy. -->
	<name>dfs.name.dir</name>
	<value>/scratch/hadoop_test_surfer/dfs/name/</value>
	<final>true</final>
	</property>

	<property>
	<!-- Comma separated list of paths on the local filesystem of
	a DataNode where it should store its blocks. If this is
	a comma-delimited list of directories, then data will be
	stored in all named directories, typically on different
	devices. -->
	<name>dfs.data.dir</name>
	<value>/scratch/hadoop_test_surfer/dfs/data/</value> <!-- we only have one local disk -->
	<final>true</final>
	</property>

	<property>
	<name>dfs.replication</name>
	<value>1</value>
	</property>

	<!-- for secondarynamenode -->
	<property>
	<name>dfs.http.address</name>
	<value>namenode.data.net:50070</value>
	</property>
	<!--added for hbase support -->
	<property>
	<name>dfs.support.append</name>
	<value>true</value>
	</property>
	<property>
	<name>dfs.datanode.max.xcievers</name>
	<value>4096</value>
	</property>


	</configuration>
	--------------------------------------------------------------------------------
	mapred-site.xml

	<?xml version="1.0"?>
	<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

	<!-- Put site-specific property overrides in this file. -->

	<configuration>
	<property>
	<name>mapred.job.tracker</name>
	<value>namenode.data.net:9012</value>
	</property>

	<property>
	<name>mapred.system.dir</name>
	<value>/hadoop/mapred/system</value>
	<final>true</final>
	</property>

	<property>
	<name>mapred.local.dir</name>
	<value>/scratch/hadoop_test_surfer/tmp</value>
	</property>

	<property>
	<name>mapred.system.dir</name>
	<value>/hadoop/mapred/system</value>
	<final>true</final>
	</property>

	<property>
	<name>mapred.local.dir</name>
	<value>/scratch/hadoop_test_surfer/tmp</value>
	</property>

	<property>
	<name>mapred.tasktracker.map.tasks.maximum</name><value>3</value>
	</property>
	<property>
	<name>mapred.tasktracker.reduce.tasks.maximum</name><value>3</value>
	</property>

	<property>
	<name>mapred.task.timeout</name>
	<value>1800000</value> <!-- 30 minutes -->
	</property>

	</configuration>

	---------------------------------------------------------------------------------------------
	hadoop-env.sh

	# The only required environment variable is JAVA_HOME. All others are
	# optional. When running a distributed configuration it is best to
	# set JAVA_HOME in this file, so that it is correctly defined on
	# remote nodes.

	# The java implementation to use. Required.
	# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
	export JAVA_HOME=/Archive/Software/Java/Sun/jdk1.6.0_22/

	# Extra Java CLASSPATH elements. Optional.
	# export HADOOP_CLASSPATH=
	export HADOOP_CLASSPATH=/SHARE/USERFS/els7/users/surfer/hbase-0.92.1/hbase-0.92.1.jar:/SHARE/USERFS/els7/users/surfer/hbase-0.92.1/lib/zookeeper-3.4.3.jar

	# The maximum amount of heap to use, in MB. Default is 1000.
	# export HADOOP_HEAPSIZE=2000

	# Extra Java runtime options. Empty by default.
	# export HADOOP_OPTS=-server

	# Command specific options appended to HADOOP_OPTS when specified
	export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
	export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
	export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
	export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
	export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
	# export HADOOP_TASKTRACKER_OPTS=
	# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
	# export HADOOP_CLIENT_OPTS

	# Extra ssh options. Empty by default.
	# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"

	# Where log files are stored. $HADOOP_HOME/logs by default.
	# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
	#export HADOOP_LOG_DIR=/scratch/hadoop_test_surfer/hadoop-logs
	export HADOOP_LOG_DIR=${MyPath}/logs

	# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
	# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
	export HADOOP_SLAVES=${MyPath}/slaves

	# host:path where hadoop code should be rsync'd from. Unset by default.
	# export HADOOP_MASTER=master:/home/$USER/src/hadoop
	#export HADOOP_LOG_DIR=/scratch/hadoop_test_surfer/hadoop-logs
	export HADOOP_LOG_DIR=SHARE/USERFS/els7/users/surfer/hadoop_conf/logs

	# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
	# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
	export HADOOP_SLAVES=SHARE/USERFS/els7/users/surfer/hadoop_conf/slaves

	# host:path where hadoop code should be rsync'd from. Unset by default.
	# export HADOOP_MASTER=master:/home/$USER/src/hadoop

	# Seconds to sleep between slave commands. Unset by default. This
	# can be useful in large clusters, where, e.g., slave rsyncs can
	# otherwise arrive faster than the master can service them.
	# export HADOOP_SLAVE_SLEEP=0.1

	# The directory where pid files are stored. /tmp by default.
	# export HADOOP_PID_DIR=/var/hadoop/pids

	# A string representing this instance of hadoop. $USER by default.
	# export HADOOP_IDENT_STRING=$USER

	# The scheduling priority for daemon processes. See 'man nice'.
	# export HADOOP_NICENESS=10