Skip to content

Instantly share code, notes, and snippets.

@balvinder294
Created March 31, 2020 13:02
Show Gist options
  • Save balvinder294/a851cf5204a1fa7b2e56f01ff4b49210 to your computer and use it in GitHub Desktop.
Save balvinder294/a851cf5204a1fa7b2e56f01ff4b49210 to your computer and use it in GitHub Desktop.
Sample Code for AWS Textract Key-Value Extraction JAVA AWS SDK --- Tekraze
public Map<String, String> getKeyValueRelationShipByBlocks(List<Block> blocks) {
Map<String, Map<String, Block>> keyValueBlockMap = getKeyValueBlockMap(blocks);
getKeyValueRelationShip(keyValueBlockMap);
return null;
}
Block findValueBlock(Block keyBlock, Map<String, Block> valueMap) {
log.debug("Find value Block ");
Block valueBlock = null;
for (Relationship relationShip : keyBlock.getRelationships()) {
if (relationShip.getType().equalsIgnoreCase("VALUE")) {
for (String id : relationShip.getIds()) {
valueBlock = valueMap.get(id);
return valueBlock;
}
}
}
return valueBlock;
}
Map<String, String> getKeyValueRelationShip(Map<String, Map<String, Block>> keyValueBlockMap) {
log.debug("Get Key Value Relationship {}", keyValueBlockMap);
Map<String, String> keyValueRelationShip = new HashMap<>();
Map<String, Block> keyMap = keyValueBlockMap.get("keyMap");
Map<String, Block> valueMap = keyValueBlockMap.get("valueMap");
Map<String, Block> blocksMap = keyValueBlockMap.get("blockMap");
keyMap.forEach((blockId, keyBlock) -> {
Block valueBlock = findValueBlock(keyBlock, valueMap);
String key = getBlockText(keyBlock, blocksMap);
String val = getBlockText(valueBlock, blocksMap);
keyValueRelationShip.put(key, val);
});
return keyValueRelationShip;
}
String getBlockText(Block resultBlock, Map<String, Block> blocksMap) {
log.debug("getBlockText");
String blockText = "";
List<Relationship> relationShips = resultBlock.getRelationships();
if (relationShips != null) {
for (Relationship relationship : relationShips) {
if (relationship.getType().equals("CHILD")) {
// relationship.getIds().forEach(childId ->
for (String childId : relationship.getIds()) {
Block wordBlock = blocksMap.get(childId);
if (wordBlock.getBlockType().equals("WORD")) {
blockText = wordBlock.getText() + " ";
}
if (wordBlock.getBlockType().equals("SELECTION_ELEMENT")) {
if (wordBlock.getSelectionStatus().equals("SELECTED")) {
blockText += "X ";
}
}
}
;
}
}
}
return blockText;
}
Map<String, Map<String, Block>> getKeyValueBlockMap(List<Block> blocks) {
log.debug("Create Key Value Map from A Textract Blocks List", blocks);
// # get key and value maps
Map<String, Map<String, Block>> keyValueBlockMap = new HashMap<String, Map<String, Block>>();
Map<String, Block> keyMap = new HashMap<>();
Map<String, Block> valueMap = new HashMap<>();
Map<String, Block> blockMap = new HashMap<>();
// for block in blocks:
for (Block block : blocks) {
String blockId = block.getId();
blockMap.put(blockId, block);
if (block.getBlockType().equals("KEY_VALUE_SET")) {
if (block.getEntityTypes().contains("KEY")) {
keyMap.put(blockId, block);
} else {
valueMap.put(blockId, block);
}
}
}
keyValueBlockMap.put("keyMap", keyMap);
keyValueBlockMap.put("valueMap", valueMap);
keyValueBlockMap.put("blockMap", blockMap);
return keyValueBlockMap;
}
@robinsinghBisht
Copy link

this works for me and very understandable code

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment