Created
November 18, 2014 16:43
-
-
Save ndimiduk/bcf33f09cc7e4408f684 to your computer and use it in GitHub Desktop.
Before and after examples of using the HBase DataType API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Before and after examples based on extracts from the UserDAO in HBase In Action | |
* https://github.com/hbaseinaction/twitbase/blob/master/src/main/java/HBaseIA/TwitBase/hbase/UsersDAO.java | |
*/ | |
// | |
// Using raw byte[]'s directly. | |
// | |
private static Put mkPut(User u) { | |
Put p = new Put(Bytes.toBytes(u.user)); | |
p.add(INFO_FAM, USER_COL, Bytes.toBytes(u.user)); | |
p.add(INFO_FAM, NAME_COL, Bytes.toBytes(u.name)); | |
p.add(INFO_FAM, EMAIL_COL, Bytes.toBytes(u.email)); | |
p.add(INFO_FAM, PASS_COL, Bytes.toBytes(u.password)); | |
return p; | |
} | |
// | |
// Using the DataType APIs | |
// | |
/** Singleton instance of the DataType encoder. */ | |
static final RawString ENC_STR = RawString.ASCENDING; | |
private static Put mkPut(User u) { | |
// create a PositionedByteRange into which values are serialized | |
int maxLen = Math.max(u.user.length(), u.name.length()); | |
PositionedByteRange pbr = new SimplePositionedByteRange(maxLen); | |
ENC_STR.encode(pbr, u.user); | |
// use the encoded value as the rowkey | |
Put p = new Put(pbr.getBytes(), pbr.getOffset(), pbr.getPosition()); | |
p.add(INFO_FAM, USER_COL, Bytes.copy(pbr.getBytes(), pbr.getOffset(), pbr.getPosition())); | |
// reset the position marker to reuse the ByteRange | |
pbr.setPosition(0); | |
ENC_STR.encode(pbr, u.name); | |
p.add(INFO_FAM, NAME_COL, Bytes.copy(pbr.getBytes(), pbr.getOffset(), pbr.getPosition())); | |
// ... | |
return p; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Before and after examples based on extracts from the TwitsDAO in HBase In Action | |
* https://github.com/hbaseinaction/twitbase/blob/master/src/main/java/HBaseIA/TwitBase/hbase/TwitsDAO.java | |
*/ | |
// | |
// Using raw byte[]'s directly. | |
// | |
private static byte[] mkRowKey(String user, DateTime dt) { | |
byte[] userHash = Md5Utils.md5sum(user); | |
byte[] timestamp = Bytes.toBytes(-1 * dt.getMillis()); | |
byte[] rowKey = new byte[Md5Utils.MD5_LENGTH + longLength]; | |
int offset = 0; | |
offset = Bytes.putBytes(rowKey, offset, userHash, 0, userHash.length); | |
Bytes.putBytes(rowKey, offset, timestamp, 0, timestamp.length); | |
return rowKey; | |
} | |
// | |
// Using the DataType APIs | |
// | |
/* A custom type for encoding Strings as MD5 sums. */ | |
private static final DataType<String> MD5_TYPE = new DataType<String>() { | |
// boiler plate | |
@Override public boolean isOrderPreserving() { return false; } | |
@Override public Order getOrder() { return null; } | |
@Override public boolean isNullable() { return false; } | |
@Override public boolean isSkippable() { return true; } | |
@Override public Class<String> encodedClass() { return String.class; } | |
@Override public int encodedLength(String val) { return Md5Utils.MD5_LENGTH; } | |
// interesting bits | |
@Override public int skip(PositionedByteRange src) { | |
src.setPosition(src.getPosition() + Md5Utils.MD5_LENGTH); | |
return Md5Utils.MD5_LENGTH; | |
} | |
@Override public String decode(PositionedByteRange src) { | |
throw new RuntimeException("Cannot reverse MD5."); | |
} | |
@Override public int encode(PositionedByteRange dst, String val) { | |
dst.put(Md5Utils.md5sum(val)); | |
return Md5Utils.MD5_LENGTH; | |
} | |
}; | |
/* A Struct instance for encoding compound rowkey comprised of md5(username), reverse timestamp. */ | |
private static final Struct ENC_ROWKEY = new StructBuilder() | |
.add(MD5_TYPE) | |
.add(OrderedInt64.DESCENDING) | |
.toStruct(); | |
private static byte[] mkTypedRowKey(String user, DateTime dt) { | |
PositionedByteRange pbr = new SimplePositionedByteRange(ENC_ROWKEY.encodedLength(null)); | |
ENC_ROWKEY.encode(pbr, new Object[] { user, dt.getMillis() }); | |
return Bytes.copy(pbr.getBytes(), pbr.getOffset(), pbr.getPosition()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment