Skip to content

Instantly share code, notes, and snippets.

@sakthiinfotec
Last active April 7, 2017 03:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sakthiinfotec/102fca54c91b411f626a to your computer and use it in GitHub Desktop.
Save sakthiinfotec/102fca54c91b411f626a to your computer and use it in GitHub Desktop.
Backup single HBase table as a CSV format in local filesystem. We need to pre-define the list of columns we needed from a single column family. This code uses necessary jars to connect HBase table along with OpenCSV jar to write CSV records. Assumption here is all the columns are only string. This is also extendable further.
package com.sakthiinfotec;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Logger;
import au.com.bytecode.opencsv.CSVWriter;
/**
* This class take a backup of HBase table data of a given columns.
* It depends on OpenCSV, Hadoop & HBase jars.
*/
public class BackupHBaseTable2CSV {
private static final Logger LOGGER = Logger.getLogger(BackupHBaseTable2CSV.class);
private static final int WRITE_BATCH_SIZE = 5000;
private static final String ZK_HOST = "127.0.0.1";
private static final String ZK_PORT = "2181";
private Configuration config = null;
private static final byte[] CF = "CF".getBytes();
private static final char CSV_FIELD_SEPARATOR = ',';
private static final String BACKUP_CSV_FILE = "./backup/bigtable.csv";
/**
* Constructor sets configuration options
*/
public BackupHBaseTable2CSV() {
config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum", ZK_HOST);
config.set("hbase.zookeeper.property.clientPort", ZK_PORT);
}
public HTableInterface getHTable(final String tableName) {
HTableInterface table = null;
try {
table = new HTable(config, tableName);
} catch (IOException e) {
LOGGER.error("Unable to get HTable instance for a table \""
+ tableName + "\". Error:" + e.getMessage()
+ ". Please check if the table exists and it's status", e);
}
return table;
}
private void backup(final String tableName, final String columns[]) throws IOException {
CSVWriter writer = new CSVWriter(new FileWriter(BACKUP_CSV_FILE), CSV_FIELD_SEPARATOR);
byte[][] bColumns = new byte[columns.length][];
int i = 0;
for (String column : columns) {
bColumns[i++] = Bytes.toBytes(column);
}
List<String[]> rowList = new ArrayList<String[]>();
HConnection connection = HConnectionManager.createConnection(config);
HTableInterface table = connection.getTable(tableName);
Scan scan = new Scan();
try {
writer.writeNext(columns);
ResultScanner scanner = table.getScanner(scan);
String[] row;
int rowCount = 0;
for (Result result : scanner) {
i = 0;
row = new String[columns.length];
for (byte[] column : bColumns) {
row[i++] = getColumnValue(result, column);
}
rowList.add(row);
if (++rowCount % WRITE_BATCH_SIZE == 0) {
LOGGER.info(rowCount + " rows backuped ...");
writer.writeAll(rowList);
rowList.clear();
}
}
if (rowList.size() > 0)
writer.writeAll(rowList);
LOGGER.info("Totally " + rowCount + " rows backup completed ...");
} catch (IOException e) {
throw e;
} finally {
writer.close();
}
}
private String getColumnValue(Result result, byte[] qualifier) {
byte[] value = result.getValue(CF, qualifier);
return (value == null) ? "" : Bytes.toString(value);
}
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
String tableName = "bigtable";
final String[] columns = { "rowKey", "id", "lat", "lng",
"time", "speed", "head"};
new BackupHBaseTable2CSV().backup(tableName, columns);
System.out.println("Backup completed...");
}
}
@sakthiinfotec
Copy link
Author

To backup columns with different data types, please visit my github repository hbase-csv-backup

@shaileshguptafk
Copy link

@sakthiinfotec, it should be extended to export only specific columns of some column families.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment