Skip to content

Instantly share code, notes, and snippets.

@raphael0202
Created March 24, 2023 07:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save raphael0202/7bc50ee50e86fdc9ba10cac2c145ebf8 to your computer and use it in GitHub Desktop.
Save raphael0202/7bc50ee50e86fdc9ba10cac2c145ebf8 to your computer and use it in GitHub Desktop.
from pprint import pprint
from PIL import Image
from robotoff.prediction.category.neural.keras_category_classifier_3_0.preprocessing import (
get_ingredient_processor,
)
from robotoff.prediction.ocr.core import get_ocr_result
from robotoff.prediction.ocr.dataclass import (
OCRFullTextAnnotation,
compute_intersection_bounding_box,
)
from robotoff.utils import get_image_from_url, get_logger, http_session
logger = get_logger()
source_image = "/23151192/2.jpg"
source_ocr = source_image.replace(".jpg", ".json")
ocr_url = f"https://static.openfoodfacts.org/images/products{source_ocr}"
# Get OCR result from URL
ocr_result = get_ocr_result(ocr_url, http_session)
# processor is the flashtext processor used to detect ingredients in a string
processor = get_ingredient_processor()
full_text_annotation: OCRFullTextAnnotation = ocr_result.full_text_annotation # type: ignore
text = full_text_annotation.continuous_text
# Detect all ingredient mentions
matches = processor.extract_keywords(text, span_info=True)
if matches:
print(f"=== text ===\n{text}\n")
print("=== matches ===")
pprint(matches)
print("-----\n")
# Ingredient list start from ingredient of index 5 for this image.
# This step is done manually here, but has to be done using a custom
# algorithm/ML technique
ingredient_matches = matches[5:]
words = []
for (_, start_idx, end_idx) in ingredient_matches:
# Get `Word`s associated with the match, it's useful to get the word coordinates on the photo
words += full_text_annotation.get_words_from_indices(
start_idx, end_idx, raises=True
)
y_min, x_min, y_max, x_max = compute_intersection_bounding_box(words)
image: Image.Image = get_image_from_url(
f"https://images.openfoodfacts.org/images/products{source_image}"
)
x_min /= image.width
x_max /= image.width
y_min /= image.height
y_max /= image.height
crop_url = f"https://robotoff.openfoodfacts.org/api/v1/images/crop?image_url=https://images.openfoodfacts.org/images/products{source_image}&y_min={y_min}&x_min={x_min}&y_max={y_max}&x_max={x_max}"
print(f"words: {words}")
print(f"Crop image URL: {crop_url}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment