Skip to content

Instantly share code, notes, and snippets.

@stauntmaster
Last active July 4, 2016 09:35
Show Gist options
  • Save stauntmaster/2bb63f6ed5b968040ef1 to your computer and use it in GitHub Desktop.
Save stauntmaster/2bb63f6ed5b968040ef1 to your computer and use it in GitHub Desktop.
benchmarking in terms of directly writing to hdfs, appending to an existing file in hdfs, and locally writing the file and uploading it to hdfs.
package org.shiftehfar.reza.benchmark;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.google.common.base.Joiner;
/**
* benchmarking in terms of directly writing to hdfs/ appending to an existing
* file in hdfs/ locally writing the file and uploading to hdfs.
*
* Surprisingly directly writing to hdfs using FSDataOutputStream is faster than
* locally writing and uploading.
*
* For a 165MB file, it takes less than 1 second to directly write the content
* into hdfs. On the other hand, it takes around 4 seconds to writhe the same
* file locally and copying it into hdfs.
*
* Our hadoop cluster does not allow append. (dfs.support.append is set to
* false). So, appending was not tested.
*
*/
public class HadoopBenchmark
{
static final int totalRecords = 3000000; // 3 mil
static final String message = "Most folks are as happy as they make up their minds to be.";
public static void main(String[] args)
{
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://nameservice1");
// this means we are talking to a HA NameNode setup
String nameService = "nameservice1";
conf.set("dfs.nameservices", nameService);
conf.setBoolean("dfs.support.append", true);
// this is of the form below
// namenode1#host1.domain:port,namenode2#host2.domain:port
List<String> names = new ArrayList<String>();
names.add("nameNodeList.add(namenode16#hadoopmaster02-sjc1.prod.uber.internal:8020");
names.add("namenode15#hadoopmaster03-sjc1.prod.uber.internal:8020");
List<String> nameNodeList = new ArrayList<String>();
for (String nameNodeTuple : names) {
String name = nameNodeTuple.split("#")[0];
String host = nameNodeTuple.split("#")[1];
nameNodeList.add(name);
conf.set("dfs.namenode.rpc-address." + nameService + "." + name, host);
}
conf.set("dfs.ha.namenodes." + nameService, Joiner.on(",").join(nameNodeList));
conf.set("dfs.client.failover.proxy.provider." + nameService,
"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");
FileSystem fs = null;
/*
* Experiments:
*/
byte[] messageBytes = message.getBytes();
System.out.println(" Experiment 1: Directly writing to an HDFS file...");
long startMs = System.currentTimeMillis();
String inputfile = "hdfs:/user/reza/experiment_1";
Path filenamePath = new Path(inputfile);
try {
fs = FileSystem.get(conf);
startMs = System.currentTimeMillis();
FSDataOutputStream writer = fs.create(filenamePath, true);
for (int i = 0; i < totalRecords; i++) {
writer.write(messageBytes);
}
writer.close();
System.out.println("Experiment 1:: Required time (milli sec): " + (System.currentTimeMillis() - startMs));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println(" Experiment 2: Appending to an existing HDFS file...");
// get if file append functionality is enabled
if (fs.getConf().get("dfs.support.append").equalsIgnoreCase("true")) {
try {
startMs = System.currentTimeMillis();
FSDataOutputStream writer = fs.append(filenamePath);
for (int i = 0; i < totalRecords; i++) {
writer.write(messageBytes);
}
System.out.println("Write succeeded");
writer.close();
System.out.println("Experiment 2:: Required time (milli sec): "
+ (System.currentTimeMillis() - startMs));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
System.out.println(" Experiment 3: Writing locally and uploading file to hdfs...");
String localfile = "experiment_local";
try {
startMs = System.currentTimeMillis();
File localFile = new File(localfile);
FileOutputStream writer = new FileOutputStream(localFile);
for (int i = 0; i < totalRecords; i++) {
writer.write(messageBytes);
}
writer.close();
Path outputFilePath = new Path("hdfs:/user/reza/experiment_3");
Path localFilePath = new Path(localfile);
fs.copyFromLocalFile(false, true, localFilePath, outputFilePath);
System.out.println("Experiment 3:: Required time (milli sec): " + (System.currentTimeMillis() - startMs));
System.out.println(" Output file size is around " + localFile.length() / (1024 * 1024) + " MB");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("All experiments successfully finished!");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment