achikin/Dockerfile

## Dockerfile
FROM ubuntu:16.04

WORKDIR /my/
RUN apt-get -qq -y update
RUN apt-get -qq -y install python
RUN apt-get -qq -y install python-pip tesseract-ocr python-pythonmagick libopencv-dev python-opencv
RUN pip install doc2text
ADD dtt.py /my/
ADD image.png /my/

CMD ["/usr/bin/python","/my/dtt.py"]

## dtt.py
import doc2text

# Initialize the class.
doc = doc2text.Document()

# Read the file in. Currently accepts pdf, png, jpg, bmp, tiff.
# If reading a PDF, doc2text will split the PDF into its component pages.
doc.read('/my/image.png')

# Crop the pages down to estimated text regions, deskew, and optimize for OCR.
doc.process()

# Extract text from the pages.
doc.extract_text()
text = doc.get_text()
print text
	FROM ubuntu:16.04

	WORKDIR /my/
	RUN apt-get -qq -y update
	RUN apt-get -qq -y install python
	RUN apt-get -qq -y install python-pip tesseract-ocr python-pythonmagick libopencv-dev python-opencv
	RUN pip install doc2text
	ADD dtt.py /my/
	ADD image.png /my/

	CMD ["/usr/bin/python","/my/dtt.py"]
	import doc2text

	# Initialize the class.
	doc = doc2text.Document()

	# Read the file in. Currently accepts pdf, png, jpg, bmp, tiff.
	# If reading a PDF, doc2text will split the PDF into its component pages.
	doc.read('/my/image.png')

	# Crop the pages down to estimated text regions, deskew, and optimize for OCR.
	doc.process()

	# Extract text from the pages.
	doc.extract_text()
	text = doc.get_text()
	print text