Created
May 23, 2023 19:34
-
-
Save itasyurt/c03b321ce3c707b44a84068b61a540e7 to your computer and use it in GitHub Desktop.
OCRController after file check and spell correction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.google.cloud.vision.v1.AnnotateImageRequest | |
import com.google.cloud.vision.v1.Feature | |
import com.google.cloud.vision.v1.Image | |
import com.google.cloud.vision.v1.ImageAnnotatorClient | |
import com.google.cloud.vision.v1.TextAnnotation | |
import com.google.protobuf.ByteString | |
import com.google.cloud.language.v1.LanguageServiceClient | |
import com.google.cloud.language.v1.Document | |
import com.google.cloud.language.v1.AnalyzeSyntaxRequest | |
import org.springframework.beans.factory.annotation.Value | |
import org.springframework.http.HttpStatus | |
import org.springframework.http.MediaType | |
import org.springframework.http.ResponseEntity | |
import org.springframework.validation.annotation.Validated | |
import org.springframework.web.bind.annotation.PostMapping | |
import org.springframework.web.bind.annotation.RequestMapping | |
import org.springframework.web.bind.annotation.RequestPart | |
import org.springframework.web.bind.annotation.RestController | |
import org.springframework.web.multipart.MultipartFile | |
import java.io.FileInputStream | |
import javax.validation.constraints.NotNull | |
@RestController | |
@RequestMapping("/api/ocr") | |
@Validated | |
class OCRController { | |
@Value("\${google.cloud.credentials.path}") | |
private lateinit var credentialsPath: String | |
@Value("\${google.cloud.project-id}") | |
private lateinit var projectId: String | |
@PostMapping(consumes = [MediaType.MULTIPART_FORM_DATA_VALUE]) | |
fun ocr(@NotNull @RequestPart("file") file: MultipartFile): ResponseEntity<String> { | |
if (!isSupportedImageType(file)) { | |
return ResponseEntity.status(HttpStatus.BAD_REQUEST).body("Unsupported image type. Only PNG and JPEG images are allowed.") | |
} | |
val imageBytes = ByteString.copyFrom(file.bytes) | |
val image = Image.newBuilder().setContent(imageBytes).build() | |
val settings = ImageAnnotatorSettings.newBuilder() | |
.setCredentialsProvider { FixedCredentialsProvider.create(ServiceAccountCredentials.fromStream(FileInputStream(credentialsPath))) } | |
.build() | |
ImageAnnotatorClient.create(settings).use { imageAnnotatorClient -> | |
val imageRequest = AnnotateImageRequest.newBuilder() | |
.addFeatures(Feature.newBuilder().setType(Feature.Type.TEXT_DETECTION)) | |
.setImage(image) | |
.build() | |
val response = imageAnnotatorClient.batchAnnotateImages(listOf(imageRequest)) | |
if (response.responsesCount > 0) { | |
val annotation = response.responsesList[0].textAnnotationsList[0] | |
val correctedText = correctSpelling(annotation.description) | |
return ResponseEntity.ok(correctedText) | |
} | |
} | |
return ResponseEntity.status(HttpStatus.BAD_REQUEST).body("OCR failed: No response received") | |
} | |
private fun isSupportedImageType(file: MultipartFile): Boolean { | |
val contentType = file.contentType | |
return contentType == MediaType.IMAGE_PNG_VALUE || contentType == MediaType.IMAGE_JPEG_VALUE | |
} | |
private fun correctSpelling(text: String): String { | |
LanguageServiceClient.create().use { languageServiceClient -> | |
val document = Document.newBuilder() | |
.setContent(text) | |
.setType(Document.Type.PLAIN_TEXT) | |
.build() | |
val syntaxRequest = AnalyzeSyntaxRequest.newBuilder() | |
.setDocument(document) | |
.setEncodingType(AnalyzeSyntaxRequest.EncodingType.UTF16) | |
.build() | |
val response = languageServiceClient.analyzeSyntax(syntaxRequest) | |
val correctedText = StringBuilder(text) | |
response.tokensList.forEach { token -> | |
if (token.partOfSpeech.tag == "VERB" && token.lemma.isNotBlank()) { | |
val startIndex = token.text.beginOffset | |
val endIndex = startIndex + token.text.content.length | |
correctedText.replace(startIndex, endIndex, token.lemma) | |
} | |
} | |
return correctedText.toString() | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment