Created
March 31, 2020 13:02
-
-
Save balvinder294/a851cf5204a1fa7b2e56f01ff4b49210 to your computer and use it in GitHub Desktop.
Sample Code for AWS Textract Key-Value Extraction JAVA AWS SDK --- Tekraze
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public Map<String, String> getKeyValueRelationShipByBlocks(List<Block> blocks) { | |
Map<String, Map<String, Block>> keyValueBlockMap = getKeyValueBlockMap(blocks); | |
getKeyValueRelationShip(keyValueBlockMap); | |
return null; | |
} | |
Block findValueBlock(Block keyBlock, Map<String, Block> valueMap) { | |
log.debug("Find value Block "); | |
Block valueBlock = null; | |
for (Relationship relationShip : keyBlock.getRelationships()) { | |
if (relationShip.getType().equalsIgnoreCase("VALUE")) { | |
for (String id : relationShip.getIds()) { | |
valueBlock = valueMap.get(id); | |
return valueBlock; | |
} | |
} | |
} | |
return valueBlock; | |
} | |
Map<String, String> getKeyValueRelationShip(Map<String, Map<String, Block>> keyValueBlockMap) { | |
log.debug("Get Key Value Relationship {}", keyValueBlockMap); | |
Map<String, String> keyValueRelationShip = new HashMap<>(); | |
Map<String, Block> keyMap = keyValueBlockMap.get("keyMap"); | |
Map<String, Block> valueMap = keyValueBlockMap.get("valueMap"); | |
Map<String, Block> blocksMap = keyValueBlockMap.get("blockMap"); | |
keyMap.forEach((blockId, keyBlock) -> { | |
Block valueBlock = findValueBlock(keyBlock, valueMap); | |
String key = getBlockText(keyBlock, blocksMap); | |
String val = getBlockText(valueBlock, blocksMap); | |
keyValueRelationShip.put(key, val); | |
}); | |
return keyValueRelationShip; | |
} | |
String getBlockText(Block resultBlock, Map<String, Block> blocksMap) { | |
log.debug("getBlockText"); | |
String blockText = ""; | |
List<Relationship> relationShips = resultBlock.getRelationships(); | |
if (relationShips != null) { | |
for (Relationship relationship : relationShips) { | |
if (relationship.getType().equals("CHILD")) { | |
// relationship.getIds().forEach(childId -> | |
for (String childId : relationship.getIds()) { | |
Block wordBlock = blocksMap.get(childId); | |
if (wordBlock.getBlockType().equals("WORD")) { | |
blockText = wordBlock.getText() + " "; | |
} | |
if (wordBlock.getBlockType().equals("SELECTION_ELEMENT")) { | |
if (wordBlock.getSelectionStatus().equals("SELECTED")) { | |
blockText += "X "; | |
} | |
} | |
} | |
; | |
} | |
} | |
} | |
return blockText; | |
} | |
Map<String, Map<String, Block>> getKeyValueBlockMap(List<Block> blocks) { | |
log.debug("Create Key Value Map from A Textract Blocks List", blocks); | |
// # get key and value maps | |
Map<String, Map<String, Block>> keyValueBlockMap = new HashMap<String, Map<String, Block>>(); | |
Map<String, Block> keyMap = new HashMap<>(); | |
Map<String, Block> valueMap = new HashMap<>(); | |
Map<String, Block> blockMap = new HashMap<>(); | |
// for block in blocks: | |
for (Block block : blocks) { | |
String blockId = block.getId(); | |
blockMap.put(blockId, block); | |
if (block.getBlockType().equals("KEY_VALUE_SET")) { | |
if (block.getEntityTypes().contains("KEY")) { | |
keyMap.put(blockId, block); | |
} else { | |
valueMap.put(blockId, block); | |
} | |
} | |
} | |
keyValueBlockMap.put("keyMap", keyMap); | |
keyValueBlockMap.put("valueMap", valueMap); | |
keyValueBlockMap.put("blockMap", blockMap); | |
return keyValueBlockMap; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
this works for me and very understandable code