Skip to content

Instantly share code, notes, and snippets.

@ndimiduk
Created November 18, 2014 16:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ndimiduk/bcf33f09cc7e4408f684 to your computer and use it in GitHub Desktop.
Save ndimiduk/bcf33f09cc7e4408f684 to your computer and use it in GitHub Desktop.
Before and after examples of using the HBase DataType API
/*
* Before and after examples based on extracts from the UserDAO in HBase In Action
* https://github.com/hbaseinaction/twitbase/blob/master/src/main/java/HBaseIA/TwitBase/hbase/UsersDAO.java
*/
//
// Using raw byte[]'s directly.
//
private static Put mkPut(User u) {
Put p = new Put(Bytes.toBytes(u.user));
p.add(INFO_FAM, USER_COL, Bytes.toBytes(u.user));
p.add(INFO_FAM, NAME_COL, Bytes.toBytes(u.name));
p.add(INFO_FAM, EMAIL_COL, Bytes.toBytes(u.email));
p.add(INFO_FAM, PASS_COL, Bytes.toBytes(u.password));
return p;
}
//
// Using the DataType APIs
//
/** Singleton instance of the DataType encoder. */
static final RawString ENC_STR = RawString.ASCENDING;
private static Put mkPut(User u) {
// create a PositionedByteRange into which values are serialized
int maxLen = Math.max(u.user.length(), u.name.length());
PositionedByteRange pbr = new SimplePositionedByteRange(maxLen);
ENC_STR.encode(pbr, u.user);
// use the encoded value as the rowkey
Put p = new Put(pbr.getBytes(), pbr.getOffset(), pbr.getPosition());
p.add(INFO_FAM, USER_COL, Bytes.copy(pbr.getBytes(), pbr.getOffset(), pbr.getPosition()));
// reset the position marker to reuse the ByteRange
pbr.setPosition(0);
ENC_STR.encode(pbr, u.name);
p.add(INFO_FAM, NAME_COL, Bytes.copy(pbr.getBytes(), pbr.getOffset(), pbr.getPosition()));
// ...
return p;
}
/*
* Before and after examples based on extracts from the TwitsDAO in HBase In Action
* https://github.com/hbaseinaction/twitbase/blob/master/src/main/java/HBaseIA/TwitBase/hbase/TwitsDAO.java
*/
//
// Using raw byte[]'s directly.
//
private static byte[] mkRowKey(String user, DateTime dt) {
byte[] userHash = Md5Utils.md5sum(user);
byte[] timestamp = Bytes.toBytes(-1 * dt.getMillis());
byte[] rowKey = new byte[Md5Utils.MD5_LENGTH + longLength];
int offset = 0;
offset = Bytes.putBytes(rowKey, offset, userHash, 0, userHash.length);
Bytes.putBytes(rowKey, offset, timestamp, 0, timestamp.length);
return rowKey;
}
//
// Using the DataType APIs
//
/* A custom type for encoding Strings as MD5 sums. */
private static final DataType<String> MD5_TYPE = new DataType<String>() {
// boiler plate
@Override public boolean isOrderPreserving() { return false; }
@Override public Order getOrder() { return null; }
@Override public boolean isNullable() { return false; }
@Override public boolean isSkippable() { return true; }
@Override public Class<String> encodedClass() { return String.class; }
@Override public int encodedLength(String val) { return Md5Utils.MD5_LENGTH; }
// interesting bits
@Override public int skip(PositionedByteRange src) {
src.setPosition(src.getPosition() + Md5Utils.MD5_LENGTH);
return Md5Utils.MD5_LENGTH;
}
@Override public String decode(PositionedByteRange src) {
throw new RuntimeException("Cannot reverse MD5.");
}
@Override public int encode(PositionedByteRange dst, String val) {
dst.put(Md5Utils.md5sum(val));
return Md5Utils.MD5_LENGTH;
}
};
/* A Struct instance for encoding compound rowkey comprised of md5(username), reverse timestamp. */
private static final Struct ENC_ROWKEY = new StructBuilder()
.add(MD5_TYPE)
.add(OrderedInt64.DESCENDING)
.toStruct();
private static byte[] mkTypedRowKey(String user, DateTime dt) {
PositionedByteRange pbr = new SimplePositionedByteRange(ENC_ROWKEY.encodedLength(null));
ENC_ROWKEY.encode(pbr, new Object[] { user, dt.getMillis() });
return Bytes.copy(pbr.getBytes(), pbr.getOffset(), pbr.getPosition());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment