Skip to content

Instantly share code, notes, and snippets.

Created September 4, 2012 13:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/3620975 to your computer and use it in GitHub Desktop.
Save anonymous/3620975 to your computer and use it in GitHub Desktop.
core-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://namenode.data.net:9011</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/tmp/hadoop_test_fullydistributed_${user.name}</value>
<final>true</final>
<description>A base for other temporary directories.</description>
</property>
</configuration>
-------------------------------------------------------------------------------------------
hdfs-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.hosts.exclude</name>
<value>/SHARE/USERFS/els7/users/surfer/exclude_hosts</value>
</property>
<property>
<!-- Path on the local filesystem where the NameNode stores
the namespace and transactions logs persistently. If
this is a comma-delimited list of directories then the
name table is replicated in all of the directories, for
redundancy. -->
<name>dfs.name.dir</name>
<value>/scratch/hadoop_test_surfer/dfs/name/</value>
<final>true</final>
</property>
<property>
<!-- Comma separated list of paths on the local filesystem of
a DataNode where it should store its blocks. If this is
a comma-delimited list of directories, then data will be
stored in all named directories, typically on different
devices. -->
<name>dfs.data.dir</name>
<value>/scratch/hadoop_test_surfer/dfs/data/</value> <!-- we only have one local disk -->
<final>true</final>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<!-- for secondarynamenode -->
<property>
<name>dfs.http.address</name>
<value>namenode.data.net:50070</value>
</property>
<!--added for hbase support -->
<property>
<name>dfs.support.append</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.max.xcievers</name>
<value>4096</value>
</property>
</configuration>
--------------------------------------------------------------------------------
mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>namenode.data.net:9012</value>
</property>
<property>
<name>mapred.system.dir</name>
<value>/hadoop/mapred/system</value>
<final>true</final>
</property>
<property>
<name>mapred.local.dir</name>
<value>/scratch/hadoop_test_surfer/tmp</value>
</property>
<property>
<name>mapred.system.dir</name>
<value>/hadoop/mapred/system</value>
<final>true</final>
</property>
<property>
<name>mapred.local.dir</name>
<value>/scratch/hadoop_test_surfer/tmp</value>
</property>
<property>
<name>mapred.tasktracker.map.tasks.maximum</name><value>3</value>
</property>
<property>
<name>mapred.tasktracker.reduce.tasks.maximum</name><value>3</value>
</property>
<property>
<name>mapred.task.timeout</name>
<value>1800000</value> <!-- 30 minutes -->
</property>
</configuration>
---------------------------------------------------------------------------------------------
hadoop-env.sh
# The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.
# The java implementation to use. Required.
# export JAVA_HOME=/usr/lib/j2sdk1.5-sun
export JAVA_HOME=/Archive/Software/Java/Sun/jdk1.6.0_22/
# Extra Java CLASSPATH elements. Optional.
# export HADOOP_CLASSPATH=
export HADOOP_CLASSPATH=/SHARE/USERFS/els7/users/surfer/hbase-0.92.1/hbase-0.92.1.jar:/SHARE/USERFS/els7/users/surfer/hbase-0.92.1/lib/zookeeper-3.4.3.jar
# The maximum amount of heap to use, in MB. Default is 1000.
# export HADOOP_HEAPSIZE=2000
# Extra Java runtime options. Empty by default.
# export HADOOP_OPTS=-server
# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
# export HADOOP_TASKTRACKER_OPTS=
# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
# export HADOOP_CLIENT_OPTS
# Extra ssh options. Empty by default.
# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
# Where log files are stored. $HADOOP_HOME/logs by default.
# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
#export HADOOP_LOG_DIR=/scratch/hadoop_test_surfer/hadoop-logs
export HADOOP_LOG_DIR=${MyPath}/logs
# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
export HADOOP_SLAVES=${MyPath}/slaves
# host:path where hadoop code should be rsync'd from. Unset by default.
# export HADOOP_MASTER=master:/home/$USER/src/hadoop
#export HADOOP_LOG_DIR=/scratch/hadoop_test_surfer/hadoop-logs
export HADOOP_LOG_DIR=SHARE/USERFS/els7/users/surfer/hadoop_conf/logs
# File naming remote slave hosts. $HADOOP_HOME/conf/slaves by default.
# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
export HADOOP_SLAVES=SHARE/USERFS/els7/users/surfer/hadoop_conf/slaves
# host:path where hadoop code should be rsync'd from. Unset by default.
# export HADOOP_MASTER=master:/home/$USER/src/hadoop
# Seconds to sleep between slave commands. Unset by default. This
# can be useful in large clusters, where, e.g., slave rsyncs can
# otherwise arrive faster than the master can service them.
# export HADOOP_SLAVE_SLEEP=0.1
# The directory where pid files are stored. /tmp by default.
# export HADOOP_PID_DIR=/var/hadoop/pids
# A string representing this instance of hadoop. $USER by default.
# export HADOOP_IDENT_STRING=$USER
# The scheduling priority for daemon processes. See 'man nice'.
# export HADOOP_NICENESS=10
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment