Last active
April 7, 2017 03:53
-
-
Save sakthiinfotec/102fca54c91b411f626a to your computer and use it in GitHub Desktop.
Backup single HBase table as a CSV format in local filesystem. We need to pre-define the list of columns we needed from a single column family. This code uses necessary jars to connect HBase table along with OpenCSV jar to write CSV records. Assumption here is all the columns are only string. This is also extendable further.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.sakthiinfotec; | |
import java.io.FileWriter; | |
import java.io.IOException; | |
import java.util.ArrayList; | |
import java.util.List; | |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.hbase.HBaseConfiguration; | |
import org.apache.hadoop.hbase.client.HConnection; | |
import org.apache.hadoop.hbase.client.HConnectionManager; | |
import org.apache.hadoop.hbase.client.HTable; | |
import org.apache.hadoop.hbase.client.HTableInterface; | |
import org.apache.hadoop.hbase.client.Result; | |
import org.apache.hadoop.hbase.client.ResultScanner; | |
import org.apache.hadoop.hbase.client.Scan; | |
import org.apache.hadoop.hbase.util.Bytes; | |
import org.apache.log4j.Logger; | |
import au.com.bytecode.opencsv.CSVWriter; | |
/** | |
* This class take a backup of HBase table data of a given columns. | |
* It depends on OpenCSV, Hadoop & HBase jars. | |
*/ | |
public class BackupHBaseTable2CSV { | |
private static final Logger LOGGER = Logger.getLogger(BackupHBaseTable2CSV.class); | |
private static final int WRITE_BATCH_SIZE = 5000; | |
private static final String ZK_HOST = "127.0.0.1"; | |
private static final String ZK_PORT = "2181"; | |
private Configuration config = null; | |
private static final byte[] CF = "CF".getBytes(); | |
private static final char CSV_FIELD_SEPARATOR = ','; | |
private static final String BACKUP_CSV_FILE = "./backup/bigtable.csv"; | |
/** | |
* Constructor sets configuration options | |
*/ | |
public BackupHBaseTable2CSV() { | |
config = HBaseConfiguration.create(); | |
config.set("hbase.zookeeper.quorum", ZK_HOST); | |
config.set("hbase.zookeeper.property.clientPort", ZK_PORT); | |
} | |
public HTableInterface getHTable(final String tableName) { | |
HTableInterface table = null; | |
try { | |
table = new HTable(config, tableName); | |
} catch (IOException e) { | |
LOGGER.error("Unable to get HTable instance for a table \"" | |
+ tableName + "\". Error:" + e.getMessage() | |
+ ". Please check if the table exists and it's status", e); | |
} | |
return table; | |
} | |
private void backup(final String tableName, final String columns[]) throws IOException { | |
CSVWriter writer = new CSVWriter(new FileWriter(BACKUP_CSV_FILE), CSV_FIELD_SEPARATOR); | |
byte[][] bColumns = new byte[columns.length][]; | |
int i = 0; | |
for (String column : columns) { | |
bColumns[i++] = Bytes.toBytes(column); | |
} | |
List<String[]> rowList = new ArrayList<String[]>(); | |
HConnection connection = HConnectionManager.createConnection(config); | |
HTableInterface table = connection.getTable(tableName); | |
Scan scan = new Scan(); | |
try { | |
writer.writeNext(columns); | |
ResultScanner scanner = table.getScanner(scan); | |
String[] row; | |
int rowCount = 0; | |
for (Result result : scanner) { | |
i = 0; | |
row = new String[columns.length]; | |
for (byte[] column : bColumns) { | |
row[i++] = getColumnValue(result, column); | |
} | |
rowList.add(row); | |
if (++rowCount % WRITE_BATCH_SIZE == 0) { | |
LOGGER.info(rowCount + " rows backuped ..."); | |
writer.writeAll(rowList); | |
rowList.clear(); | |
} | |
} | |
if (rowList.size() > 0) | |
writer.writeAll(rowList); | |
LOGGER.info("Totally " + rowCount + " rows backup completed ..."); | |
} catch (IOException e) { | |
throw e; | |
} finally { | |
writer.close(); | |
} | |
} | |
private String getColumnValue(Result result, byte[] qualifier) { | |
byte[] value = result.getValue(CF, qualifier); | |
return (value == null) ? "" : Bytes.toString(value); | |
} | |
/** | |
* @param args | |
* @throws IOException | |
*/ | |
public static void main(String[] args) throws IOException { | |
String tableName = "bigtable"; | |
final String[] columns = { "rowKey", "id", "lat", "lng", | |
"time", "speed", "head"}; | |
new BackupHBaseTable2CSV().backup(tableName, columns); | |
System.out.println("Backup completed..."); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To backup columns with different data types, please visit my github repository hbase-csv-backup