raphael0202/detect_ingredients.py

## detect_ingredients.py
from pprint import pprint

from PIL import Image

from robotoff.prediction.category.neural.keras_category_classifier_3_0.preprocessing import (
    get_ingredient_processor,
)
from robotoff.prediction.ocr.core import get_ocr_result
from robotoff.prediction.ocr.dataclass import (
    OCRFullTextAnnotation,
    compute_intersection_bounding_box,
)
from robotoff.utils import get_image_from_url, get_logger, http_session

logger = get_logger()

source_image = "/23151192/2.jpg"
source_ocr = source_image.replace(".jpg", ".json")
ocr_url = f"https://static.openfoodfacts.org/images/products{source_ocr}"
# Get OCR result from URL
ocr_result = get_ocr_result(ocr_url, http_session)
# processor is the flashtext processor used to detect ingredients in a string
processor = get_ingredient_processor()

full_text_annotation: OCRFullTextAnnotation = ocr_result.full_text_annotation  # type: ignore
text = full_text_annotation.continuous_text

# Detect all ingredient mentions
matches = processor.extract_keywords(text, span_info=True)

if matches:
    print(f"=== text ===\n{text}\n")
    print("=== matches ===")
    pprint(matches)
    print("-----\n")
    # Ingredient list start from ingredient of index 5 for this image.
    # This step is done manually here, but has to be done using a custom
    # algorithm/ML technique
    ingredient_matches = matches[5:]
    words = []
    for (_, start_idx, end_idx) in ingredient_matches:
        # Get `Word`s associated with the match, it's useful to get the word coordinates on the photo
        words += full_text_annotation.get_words_from_indices(
            start_idx, end_idx, raises=True
        )
    y_min, x_min, y_max, x_max = compute_intersection_bounding_box(words)
    image: Image.Image = get_image_from_url(
        f"https://images.openfoodfacts.org/images/products{source_image}"
    )
    x_min /= image.width
    x_max /= image.width
    y_min /= image.height
    y_max /= image.height
    crop_url = f"https://robotoff.openfoodfacts.org/api/v1/images/crop?image_url=https://images.openfoodfacts.org/images/products{source_image}&y_min={y_min}&x_min={x_min}&y_max={y_max}&x_max={x_max}"
    print(f"words: {words}")
    print(f"Crop image URL: {crop_url}")
	from pprint import pprint

	from PIL import Image

	from robotoff.prediction.category.neural.keras_category_classifier_3_0.preprocessing import (
	get_ingredient_processor,
	)
	from robotoff.prediction.ocr.core import get_ocr_result
	from robotoff.prediction.ocr.dataclass import (
	OCRFullTextAnnotation,
	compute_intersection_bounding_box,
	)
	from robotoff.utils import get_image_from_url, get_logger, http_session

	logger = get_logger()

	source_image = "/23151192/2.jpg"
	source_ocr = source_image.replace(".jpg", ".json")
	ocr_url = f"https://static.openfoodfacts.org/images/products{source_ocr}"
	# Get OCR result from URL
	ocr_result = get_ocr_result(ocr_url, http_session)
	# processor is the flashtext processor used to detect ingredients in a string
	processor = get_ingredient_processor()

	full_text_annotation: OCRFullTextAnnotation = ocr_result.full_text_annotation # type: ignore
	text = full_text_annotation.continuous_text

	# Detect all ingredient mentions
	matches = processor.extract_keywords(text, span_info=True)

	if matches:
	print(f"=== text ===\n{text}\n")
	print("=== matches ===")
	pprint(matches)
	print("-----\n")
	# Ingredient list start from ingredient of index 5 for this image.
	# This step is done manually here, but has to be done using a custom
	# algorithm/ML technique
	ingredient_matches = matches[5:]
	words = []
	for (_, start_idx, end_idx) in ingredient_matches:
	# Get `Word`s associated with the match, it's useful to get the word coordinates on the photo
	words += full_text_annotation.get_words_from_indices(
	start_idx, end_idx, raises=True
	)
	y_min, x_min, y_max, x_max = compute_intersection_bounding_box(words)
	image: Image.Image = get_image_from_url(
	f"https://images.openfoodfacts.org/images/products{source_image}"
	)
	x_min /= image.width
	x_max /= image.width
	y_min /= image.height
	y_max /= image.height
	crop_url = f"https://robotoff.openfoodfacts.org/api/v1/images/crop?image_url=https://images.openfoodfacts.org/images/products{source_image}&y_min={y_min}&x_min={x_min}&y_max={y_max}&x_max={x_max}"
	print(f"words: {words}")
	print(f"Crop image URL: {crop_url}")