Created
March 24, 2023 07:57
-
-
Save raphael0202/7bc50ee50e86fdc9ba10cac2c145ebf8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pprint import pprint | |
from PIL import Image | |
from robotoff.prediction.category.neural.keras_category_classifier_3_0.preprocessing import ( | |
get_ingredient_processor, | |
) | |
from robotoff.prediction.ocr.core import get_ocr_result | |
from robotoff.prediction.ocr.dataclass import ( | |
OCRFullTextAnnotation, | |
compute_intersection_bounding_box, | |
) | |
from robotoff.utils import get_image_from_url, get_logger, http_session | |
logger = get_logger() | |
source_image = "/23151192/2.jpg" | |
source_ocr = source_image.replace(".jpg", ".json") | |
ocr_url = f"https://static.openfoodfacts.org/images/products{source_ocr}" | |
# Get OCR result from URL | |
ocr_result = get_ocr_result(ocr_url, http_session) | |
# processor is the flashtext processor used to detect ingredients in a string | |
processor = get_ingredient_processor() | |
full_text_annotation: OCRFullTextAnnotation = ocr_result.full_text_annotation # type: ignore | |
text = full_text_annotation.continuous_text | |
# Detect all ingredient mentions | |
matches = processor.extract_keywords(text, span_info=True) | |
if matches: | |
print(f"=== text ===\n{text}\n") | |
print("=== matches ===") | |
pprint(matches) | |
print("-----\n") | |
# Ingredient list start from ingredient of index 5 for this image. | |
# This step is done manually here, but has to be done using a custom | |
# algorithm/ML technique | |
ingredient_matches = matches[5:] | |
words = [] | |
for (_, start_idx, end_idx) in ingredient_matches: | |
# Get `Word`s associated with the match, it's useful to get the word coordinates on the photo | |
words += full_text_annotation.get_words_from_indices( | |
start_idx, end_idx, raises=True | |
) | |
y_min, x_min, y_max, x_max = compute_intersection_bounding_box(words) | |
image: Image.Image = get_image_from_url( | |
f"https://images.openfoodfacts.org/images/products{source_image}" | |
) | |
x_min /= image.width | |
x_max /= image.width | |
y_min /= image.height | |
y_max /= image.height | |
crop_url = f"https://robotoff.openfoodfacts.org/api/v1/images/crop?image_url=https://images.openfoodfacts.org/images/products{source_image}&y_min={y_min}&x_min={x_min}&y_max={y_max}&x_max={x_max}" | |
print(f"words: {words}") | |
print(f"Crop image URL: {crop_url}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment