Skip to content

Instantly share code, notes, and snippets.

@OterLabb
Created January 31, 2019 10:41
Show Gist options
  • Save OterLabb/69f34baa3daffa18da8f6b684b11f47d to your computer and use it in GitHub Desktop.
Save OterLabb/69f34baa3daffa18da8f6b684b11f47d to your computer and use it in GitHub Desktop.
"""
1. Install python 3.x from python.org
2. Install Python Imaging Library (PIL). Open CMD and type "pip install python3-imaging"
3. Install pytesseract. Open CMD and type "pip install pytesseract"
4. Install tesseract from https://github.com/UB-Mannheim/tesseract/wiki
5. Add tesseract install folder to path in windows
6. Add digits database from https://github.com/Shreeshrii/tessdata_shreetest to tesseract install folder !! NOT WORKING 100% !!
7. Do your own training http://pretius.com/how-to-prepare-training-files-for-tesseract-ocr-and-improve-characters-recognition/
8. Install ExifRead Open CMD and type "pip install exifread"
"""
# Import all the tools needed
from PIL import Image
import pytesseract
import glob
import exifread
import csv
import time
# Mark edges to crop for recognition. Borders are: left, up, right, bottom
"""
Example how to cut out the X's. Each # is one pixel.
1234567891011
1 ###########
2 ###########
3 ###########
4 ###########
5 ###########
6 ###XXX#####
7 ###XXX#####
8 ###XXX#####
9 ###########
10 ###########
Left: 4 up: 6 right: 6 bottom: 8
"""
left = 290
up = 1852
right = 357
bottom = 1915
# Create a list with all the images in the folder ending with '.jpg'
image_list = [f for f in glob.glob("*.jpg")]
# Open up a text file to write results to
with open('results.txt', mode='w', newline='') as csv_file:
fieldnames = ['Image_Name', 'Temperature'] # Fieldnames
image_writer = csv.writer(csv_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
num = 0 # For debugging/writing the cropped images
# For loop for all the images in the list
for f in image_list:
nowTime = time.time() # Get time to see calculation time
# Open image for cropping
img = Image.open(f)
area = (left, up, right, bottom) # Area defined at the start
cropped_img = img.crop(area)
name = 'bilde' + str(num) + '.png' # Name for the cropped image
cropped_img.save(name)
prediction = pytesseract.image_to_string(Image.open(name), lang='dig', config='--psm 8') # Actual prediction
thenTime = time.time() # Get the new time
diff = thenTime - nowTime # Calculate it
timeert = str(diff)[:4] # Get only the first digits
num = num +1 # Set new name for next image
print('Predicted ' + f + ' to ' + name + ' ' + prediction + ' degrees celsius in ' + str(diff)[:4] + ' seconds') # Print to console
writer.writerow({'Image_Name': f, 'Temperature': prediction}) # Write predections to file
# for Exif extraction
# exif = open(f, 'rb')
# tags = exifread.process_file(exif)
# print(tags)
# dateTaken = tags['EXIF DateTimeOriginal']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment