Skip to content

Instantly share code, notes, and snippets.

@itasyurt
Created May 23, 2023 19:34
Show Gist options
  • Save itasyurt/c03b321ce3c707b44a84068b61a540e7 to your computer and use it in GitHub Desktop.
Save itasyurt/c03b321ce3c707b44a84068b61a540e7 to your computer and use it in GitHub Desktop.
OCRController after file check and spell correction
import com.google.cloud.vision.v1.AnnotateImageRequest
import com.google.cloud.vision.v1.Feature
import com.google.cloud.vision.v1.Image
import com.google.cloud.vision.v1.ImageAnnotatorClient
import com.google.cloud.vision.v1.TextAnnotation
import com.google.protobuf.ByteString
import com.google.cloud.language.v1.LanguageServiceClient
import com.google.cloud.language.v1.Document
import com.google.cloud.language.v1.AnalyzeSyntaxRequest
import org.springframework.beans.factory.annotation.Value
import org.springframework.http.HttpStatus
import org.springframework.http.MediaType
import org.springframework.http.ResponseEntity
import org.springframework.validation.annotation.Validated
import org.springframework.web.bind.annotation.PostMapping
import org.springframework.web.bind.annotation.RequestMapping
import org.springframework.web.bind.annotation.RequestPart
import org.springframework.web.bind.annotation.RestController
import org.springframework.web.multipart.MultipartFile
import java.io.FileInputStream
import javax.validation.constraints.NotNull
@RestController
@RequestMapping("/api/ocr")
@Validated
class OCRController {
@Value("\${google.cloud.credentials.path}")
private lateinit var credentialsPath: String
@Value("\${google.cloud.project-id}")
private lateinit var projectId: String
@PostMapping(consumes = [MediaType.MULTIPART_FORM_DATA_VALUE])
fun ocr(@NotNull @RequestPart("file") file: MultipartFile): ResponseEntity<String> {
if (!isSupportedImageType(file)) {
return ResponseEntity.status(HttpStatus.BAD_REQUEST).body("Unsupported image type. Only PNG and JPEG images are allowed.")
}
val imageBytes = ByteString.copyFrom(file.bytes)
val image = Image.newBuilder().setContent(imageBytes).build()
val settings = ImageAnnotatorSettings.newBuilder()
.setCredentialsProvider { FixedCredentialsProvider.create(ServiceAccountCredentials.fromStream(FileInputStream(credentialsPath))) }
.build()
ImageAnnotatorClient.create(settings).use { imageAnnotatorClient ->
val imageRequest = AnnotateImageRequest.newBuilder()
.addFeatures(Feature.newBuilder().setType(Feature.Type.TEXT_DETECTION))
.setImage(image)
.build()
val response = imageAnnotatorClient.batchAnnotateImages(listOf(imageRequest))
if (response.responsesCount > 0) {
val annotation = response.responsesList[0].textAnnotationsList[0]
val correctedText = correctSpelling(annotation.description)
return ResponseEntity.ok(correctedText)
}
}
return ResponseEntity.status(HttpStatus.BAD_REQUEST).body("OCR failed: No response received")
}
private fun isSupportedImageType(file: MultipartFile): Boolean {
val contentType = file.contentType
return contentType == MediaType.IMAGE_PNG_VALUE || contentType == MediaType.IMAGE_JPEG_VALUE
}
private fun correctSpelling(text: String): String {
LanguageServiceClient.create().use { languageServiceClient ->
val document = Document.newBuilder()
.setContent(text)
.setType(Document.Type.PLAIN_TEXT)
.build()
val syntaxRequest = AnalyzeSyntaxRequest.newBuilder()
.setDocument(document)
.setEncodingType(AnalyzeSyntaxRequest.EncodingType.UTF16)
.build()
val response = languageServiceClient.analyzeSyntax(syntaxRequest)
val correctedText = StringBuilder(text)
response.tokensList.forEach { token ->
if (token.partOfSpeech.tag == "VERB" && token.lemma.isNotBlank()) {
val startIndex = token.text.beginOffset
val endIndex = startIndex + token.text.content.length
correctedText.replace(startIndex, endIndex, token.lemma)
}
}
return correctedText.toString()
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment