Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Sample code to use Amazon Comprehend Service for NLP - Tekraze.com
/***** Step 1 Imports *********/
import com.amazonaws.SdkClientException;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
import com.amazonaws.services.comprehend.AmazonComprehend;
import com.amazonaws.services.comprehend.AmazonComprehendClientBuilder;
import com.amazonaws.services.comprehend.model.DetectEntitiesRequest;
import com.amazonaws.services.comprehend.model.DetectEntitiesResult;
import com.amazonaws.services.comprehend.model.Entity;
/************* Step 2 *******************
** Initialize Amazon Comprehend Client
**************************************/
AmazonComprehend comprehendClient() {
log.debug("Intialize Comprehend Client");
BasicAWSCredentials awsCreds = new BasicAWSCredentials(awsAccessKey, awsSecretKey);
AWSStaticCredentialsProvider awsStaticCredentialsProvider = new AWSStaticCredentialsProvider(awsCreds);
return AmazonComprehendClientBuilder.standard().withCredentials(awsStaticCredentialsProvider)
.withRegion(awsRegion).build();
}
/*****************Step 3**************
** Call Detect entities method **********/
public List<Entity> detectEntitiesWithComprehend(String text) {
log.debug("Method to Detect Entities With Amazon Comprehend {}", blocksList.get(0));
text = trimByBytes(textToAnalyze, 5000);
DetectEntitiesRequest detectEntitiesRequest = new DetectEntitiesRequest().withText(text)
.withLanguageCode("en");
DetectEntitiesResult detectEntitiesResult = comprehendClient().detectEntities(detectEntitiesRequest);
entitiesList = detectEntitiesResult.getEntities();
return entitiesList;
}
// Method to trim text to 500 bytes as Comprehend Sync Api Limit
String trimByBytes(String str, int lengthOfBytes) {
byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
ByteBuffer buffer = ByteBuffer.wrap(bytes);
if (lengthOfBytes < buffer.limit()) {
buffer.limit(lengthOfBytes);
}
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder();
decoder.onMalformedInput(CodingErrorAction.IGNORE);
try {
return decoder.decode(buffer).toString();
} catch (CharacterCodingException e) {
// We will never get here.
throw new RuntimeException(e);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment