Created
April 6, 2020 16:44
-
-
Save balvinder294/5ebf981ba62561c0d6490418b7c61567 to your computer and use it in GitHub Desktop.
Sample code to use Amazon Comprehend Service for NLP - Tekraze.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/***** Step 1 Imports *********/ | |
import com.amazonaws.SdkClientException; | |
import com.amazonaws.auth.AWSStaticCredentialsProvider; | |
import com.amazonaws.auth.BasicAWSCredentials; | |
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; | |
import com.amazonaws.services.comprehend.AmazonComprehend; | |
import com.amazonaws.services.comprehend.AmazonComprehendClientBuilder; | |
import com.amazonaws.services.comprehend.model.DetectEntitiesRequest; | |
import com.amazonaws.services.comprehend.model.DetectEntitiesResult; | |
import com.amazonaws.services.comprehend.model.Entity; | |
/************* Step 2 ******************* | |
** Initialize Amazon Comprehend Client | |
**************************************/ | |
AmazonComprehend comprehendClient() { | |
log.debug("Intialize Comprehend Client"); | |
BasicAWSCredentials awsCreds = new BasicAWSCredentials(awsAccessKey, awsSecretKey); | |
AWSStaticCredentialsProvider awsStaticCredentialsProvider = new AWSStaticCredentialsProvider(awsCreds); | |
return AmazonComprehendClientBuilder.standard().withCredentials(awsStaticCredentialsProvider) | |
.withRegion(awsRegion).build(); | |
} | |
/*****************Step 3************** | |
** Call Detect entities method **********/ | |
public List<Entity> detectEntitiesWithComprehend(String text) { | |
log.debug("Method to Detect Entities With Amazon Comprehend {}", blocksList.get(0)); | |
text = trimByBytes(textToAnalyze, 5000); | |
DetectEntitiesRequest detectEntitiesRequest = new DetectEntitiesRequest().withText(text) | |
.withLanguageCode("en"); | |
DetectEntitiesResult detectEntitiesResult = comprehendClient().detectEntities(detectEntitiesRequest); | |
entitiesList = detectEntitiesResult.getEntities(); | |
return entitiesList; | |
} | |
// Method to trim text to 500 bytes as Comprehend Sync Api Limit | |
String trimByBytes(String str, int lengthOfBytes) { | |
byte[] bytes = str.getBytes(StandardCharsets.UTF_8); | |
ByteBuffer buffer = ByteBuffer.wrap(bytes); | |
if (lengthOfBytes < buffer.limit()) { | |
buffer.limit(lengthOfBytes); | |
} | |
CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder(); | |
decoder.onMalformedInput(CodingErrorAction.IGNORE); | |
try { | |
return decoder.decode(buffer).toString(); | |
} catch (CharacterCodingException e) { | |
// We will never get here. | |
throw new RuntimeException(e); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment