Sample code to use Amazon Comprehend Service for NLP - Tekraze.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/***** Step 1 Imports *********/ | |
import com.amazonaws.SdkClientException; | |
import com.amazonaws.auth.AWSStaticCredentialsProvider; | |
import com.amazonaws.auth.BasicAWSCredentials; | |
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; | |
import com.amazonaws.services.comprehend.AmazonComprehend; | |
import com.amazonaws.services.comprehend.AmazonComprehendClientBuilder; | |
import com.amazonaws.services.comprehend.model.DetectEntitiesRequest; | |
import com.amazonaws.services.comprehend.model.DetectEntitiesResult; | |
import com.amazonaws.services.comprehend.model.Entity; | |
/************* Step 2 ******************* | |
** Initialize Amazon Comprehend Client | |
**************************************/ | |
AmazonComprehend comprehendClient() { | |
log.debug("Intialize Comprehend Client"); | |
BasicAWSCredentials awsCreds = new BasicAWSCredentials(awsAccessKey, awsSecretKey); | |
AWSStaticCredentialsProvider awsStaticCredentialsProvider = new AWSStaticCredentialsProvider(awsCreds); | |
return AmazonComprehendClientBuilder.standard().withCredentials(awsStaticCredentialsProvider) | |
.withRegion(awsRegion).build(); | |
} | |
/*****************Step 3************** | |
** Call Detect entities method **********/ | |
public List<Entity> detectEntitiesWithComprehend(String text) { | |
log.debug("Method to Detect Entities With Amazon Comprehend {}", blocksList.get(0)); | |
text = trimByBytes(textToAnalyze, 5000); | |
DetectEntitiesRequest detectEntitiesRequest = new DetectEntitiesRequest().withText(text) | |
.withLanguageCode("en"); | |
DetectEntitiesResult detectEntitiesResult = comprehendClient().detectEntities(detectEntitiesRequest); | |
entitiesList = detectEntitiesResult.getEntities(); | |
return entitiesList; | |
} | |
// Method to trim text to 500 bytes as Comprehend Sync Api Limit | |
String trimByBytes(String str, int lengthOfBytes) { | |
byte[] bytes = str.getBytes(StandardCharsets.UTF_8); | |
ByteBuffer buffer = ByteBuffer.wrap(bytes); | |
if (lengthOfBytes < buffer.limit()) { | |
buffer.limit(lengthOfBytes); | |
} | |
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder(); | |
decoder.onMalformedInput(CodingErrorAction.IGNORE); | |
try { | |
return decoder.decode(buffer).toString(); | |
} catch (CharacterCodingException e) { | |
// We will never get here. | |
throw new RuntimeException(e); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment