amitu/ocr.py

## ocr.py
#!/usr/bin/env python

# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Outlines document text given an image.
Example:
    python doctext.py resources/text_menu.jpg
"""

"""
$ export GOOGLE_APPLICATION_CREDENTIALS=tmp/Karmator-05dd6a510e87.json
$ python src/scripts/users/amitu/ocr.py tmp/p.png
"""

# [START full_tutorial]
# [START imports]
import argparse
from enum import Enum
import io

from google.cloud import vision
from google.cloud.vision import types
from PIL import Image, ImageDraw
# [END imports]


class FeatureType(Enum):
    PAGE = 1
    BLOCK = 2
    PARA = 3
    WORD = 4
    SYMBOL = 5


def draw_boxes(image, bounds, color):
    """Draw a border around the image using the hints in the vector list."""
    # [START draw_blocks]
    draw = ImageDraw.Draw(image)

    for bound in bounds:
        draw.polygon([
            bound.vertices[0].x, bound.vertices[0].y,
            bound.vertices[1].x, bound.vertices[1].y,
            bound.vertices[2].x, bound.vertices[2].y,
            bound.vertices[3].x, bound.vertices[3].y], None, color)
    return image
    # [END draw_blocks]


def get_document_bounds(image_file, feature):
    # [START detect_bounds]
    """Returns document bounds given an image."""
    client = vision.ImageAnnotatorClient()

    bounds = []

    with io.open(image_file, 'rb') as image_file:
        content = image_file.read()

    image = types.Image(content=content)

    response = client.document_text_detection(image=image)
    document = response.full_text_annotation

    # Collect specified feature bounds by enumerating all document features
    for page in document.pages:
        for block in page.blocks:
            for paragraph in block.paragraphs:
                for word in paragraph.words:
                    print("".join(s.text for s in word.symbols))
                    for symbol in word.symbols:
                        if (feature == FeatureType.SYMBOL):
                            bounds.append(symbol.bounding_box)

                    if (feature == FeatureType.WORD):
                        bounds.append(word.bounding_box)

                if (feature == FeatureType.PARA):
                    bounds.append(paragraph.bounding_box)

            if (feature == FeatureType.BLOCK):
                bounds.append(block.bounding_box)

        if (feature == FeatureType.PAGE):
            bounds.append(block.bounding_box)

    # The list `bounds` contains the coordinates of the bounding boxes.
    # [END detect_bounds]
    return bounds


def render_doc_text(filein, fileout):
    # [START render_doc_text]
    image = Image.open(filein)
    bounds = get_document_bounds(filein, FeatureType.PAGE)
    draw_boxes(image, bounds, 'blue')
    bounds = get_document_bounds(filein, FeatureType.PARA)
    draw_boxes(image, bounds, 'red')
    bounds = get_document_bounds(filein, FeatureType.WORD)
    draw_boxes(image, bounds, 'yellow')

    if fileout is not 0:
        image.save(fileout)
    else:
        image.show()
    # [END render_doc_text]


if __name__ == '__main__':
    # [START run_doc_text]
    parser = argparse.ArgumentParser()
    parser.add_argument('detect_file', help='The image for text detection.')
    parser.add_argument('-out_file', help='Optional output file', default=0)
    args = parser.parse_args()

    parser = argparse.ArgumentParser()
    render_doc_text(args.detect_file, args.out_file)
    # [END run_doc_text]
# [END full_tutorial]
	#!/usr/bin/env python

	# Copyright 2017 Google Inc. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Outlines document text given an image.
	Example:
	python doctext.py resources/text_menu.jpg
	"""

	"""
	$ export GOOGLE_APPLICATION_CREDENTIALS=tmp/Karmator-05dd6a510e87.json
	$ python src/scripts/users/amitu/ocr.py tmp/p.png
	"""

	# [START full_tutorial]
	# [START imports]
	import argparse
	from enum import Enum
	import io

	from google.cloud import vision
	from google.cloud.vision import types
	from PIL import Image, ImageDraw
	# [END imports]


	class FeatureType(Enum):
	PAGE = 1
	BLOCK = 2
	PARA = 3
	WORD = 4
	SYMBOL = 5


	def draw_boxes(image, bounds, color):
	"""Draw a border around the image using the hints in the vector list."""
	# [START draw_blocks]
	draw = ImageDraw.Draw(image)

	for bound in bounds:
	draw.polygon([
	bound.vertices[0].x, bound.vertices[0].y,
	bound.vertices[1].x, bound.vertices[1].y,
	bound.vertices[2].x, bound.vertices[2].y,
	bound.vertices[3].x, bound.vertices[3].y], None, color)
	return image
	# [END draw_blocks]


	def get_document_bounds(image_file, feature):
	# [START detect_bounds]
	"""Returns document bounds given an image."""
	client = vision.ImageAnnotatorClient()

	bounds = []

	with io.open(image_file, 'rb') as image_file:
	content = image_file.read()

	image = types.Image(content=content)

	response = client.document_text_detection(image=image)
	document = response.full_text_annotation

	# Collect specified feature bounds by enumerating all document features
	for page in document.pages:
	for block in page.blocks:
	for paragraph in block.paragraphs:
	for word in paragraph.words:
	print("".join(s.text for s in word.symbols))
	for symbol in word.symbols:
	if (feature == FeatureType.SYMBOL):
	bounds.append(symbol.bounding_box)

	if (feature == FeatureType.WORD):
	bounds.append(word.bounding_box)

	if (feature == FeatureType.PARA):
	bounds.append(paragraph.bounding_box)

	if (feature == FeatureType.BLOCK):
	bounds.append(block.bounding_box)

	if (feature == FeatureType.PAGE):
	bounds.append(block.bounding_box)

	# The list `bounds` contains the coordinates of the bounding boxes.
	# [END detect_bounds]
	return bounds


	def render_doc_text(filein, fileout):
	# [START render_doc_text]
	image = Image.open(filein)
	bounds = get_document_bounds(filein, FeatureType.PAGE)
	draw_boxes(image, bounds, 'blue')
	bounds = get_document_bounds(filein, FeatureType.PARA)
	draw_boxes(image, bounds, 'red')
	bounds = get_document_bounds(filein, FeatureType.WORD)
	draw_boxes(image, bounds, 'yellow')

	if fileout is not 0:
	image.save(fileout)
	else:
	image.show()
	# [END render_doc_text]


	if __name__ == '__main__':
	# [START run_doc_text]
	parser = argparse.ArgumentParser()
	parser.add_argument('detect_file', help='The image for text detection.')
	parser.add_argument('-out_file', help='Optional output file', default=0)
	args = parser.parse_args()

	parser = argparse.ArgumentParser()
	render_doc_text(args.detect_file, args.out_file)
	# [END run_doc_text]
	# [END full_tutorial]