/ThreadOCR.py

## ThreadOCR.py
from itertools import takewhile
from PIL import Image


def load_from_user():
    img = None
    while img is None:
        f = raw_input('Image file? ')
        try:
            img = Image.open(open(f, 'r'))
            img.load()
        except Exception as e:
            print('Unable to load image: %s. Please try again' % f, e)
            img = None
    return img

def input_int(prompt):
    out = None
    while out is None:
        val = raw_input(prompt)
        try:
            out = int(val)
        except:
            out = None
    return out

def load_pixel_stream(img, x, y):
    image_bounds = img.getbbox()
    bbox = (x, y, image_bounds[2], y + 1)
    # Crop down to a single line of pixels
    data = img.crop(bbox)
    return data.getdata(band=1) # Get green band only

def threshold(iterable, thresh):
    return [x > thresh for x in iterable]

def runlength(iterable):
    curr = None
    curr_count = 0
    for x in iterable:
        if x == curr:
            curr_count += 1
        elif curr_count > 0:
            yield curr_count
            curr_count = 1
        curr = x

def ocr(raw_data):
    cutoff = takewhile(lambda x: x > 0, raw_data)
    return runlength(threshold(cutoff, 210))

def avg(px):
    return sum(px) / len(px)

def sum(px):
    return reduce(lambda x, y: x + y, px)

def seek_next_block(img, x, y):
    max_y = img.getbbox()[3]
    # Find the white
    while y < max_y and avg(img.getpixel((x,y))) < 230:
        y += 1
    # Now find the colour
    while y < max_y and avg(img.getpixel((x,y))) >= 230:
        y += 1
    return y

def print_row(iter):
    l = list(iter)
    fmt = ('{},' * len(l))[:-1]
    print(fmt.format(*l))

def print_as_table(results):
    keys = sorted(results.keys())
    print_row(keys)
    max_len = max(len(results[k]) for k in keys)
    for i in xrange(0, max_len):
        row = [results[k][i] if i < len(results[k]) else '' for k in keys]
        print_row(row)

if __name__ == '__main__':
    img = load_from_user()
    x = 500 # input_int('X offset? ')
    y = 285 # input_int('Starting Y offset? ')
    results = dict()
    while y < img.getbbox()[3]:
        raw_data = load_pixel_stream(img, x, y)
        acc = 0
        results[y] = list()
        for length in ocr(raw_data):
            if acc < 3200:
                results[y].append(length)
            acc += length
        if len(results[y]) == 0:
            del results[y]
        y = seek_next_block(img, x, y) + 5
    print_as_table(results)
	from itertools import takewhile
	from PIL import Image


	def load_from_user():
	img = None
	while img is None:
	f = raw_input('Image file? ')
	try:
	img = Image.open(open(f, 'r'))
	img.load()
	except Exception as e:
	print('Unable to load image: %s. Please try again' % f, e)
	img = None
	return img

	def input_int(prompt):
	out = None
	while out is None:
	val = raw_input(prompt)
	try:
	out = int(val)
	except:
	out = None
	return out

	def load_pixel_stream(img, x, y):
	image_bounds = img.getbbox()
	bbox = (x, y, image_bounds[2], y + 1)
	# Crop down to a single line of pixels
	data = img.crop(bbox)
	return data.getdata(band=1) # Get green band only

	def threshold(iterable, thresh):
	return [x > thresh for x in iterable]

	def runlength(iterable):
	curr = None
	curr_count = 0
	for x in iterable:
	if x == curr:
	curr_count += 1
	elif curr_count > 0:
	yield curr_count
	curr_count = 1
	curr = x

	def ocr(raw_data):
	cutoff = takewhile(lambda x: x > 0, raw_data)
	return runlength(threshold(cutoff, 210))

	def avg(px):
	return sum(px) / len(px)

	def sum(px):
	return reduce(lambda x, y: x + y, px)

	def seek_next_block(img, x, y):
	max_y = img.getbbox()[3]
	# Find the white
	while y < max_y and avg(img.getpixel((x,y))) < 230:
	y += 1
	# Now find the colour
	while y < max_y and avg(img.getpixel((x,y))) >= 230:
	y += 1
	return y

	def print_row(iter):
	l = list(iter)
	fmt = ('{},' * len(l))[:-1]
	print(fmt.format(*l))

	def print_as_table(results):
	keys = sorted(results.keys())
	print_row(keys)
	max_len = max(len(results[k]) for k in keys)
	for i in xrange(0, max_len):
	row = [results[k][i] if i < len(results[k]) else '' for k in keys]
	print_row(row)

	if __name__ == '__main__':
	img = load_from_user()
	x = 500 # input_int('X offset? ')
	y = 285 # input_int('Starting Y offset? ')
	results = dict()
	while y < img.getbbox()[3]:
	raw_data = load_pixel_stream(img, x, y)
	acc = 0
	results[y] = list()
	for length in ocr(raw_data):
	if acc < 3200:
	results[y].append(length)
	acc += length
	if len(results[y]) == 0:
	del results[y]
	y = seek_next_block(img, x, y) + 5
	print_as_table(results)