anonymous / ThreadOCR.py
Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

Quick and dirty solution to "OCRing" the thread running data from a YourKit Java Profiler. Note that this is only designed to extract running / not running data, and contains a few constants that are probably only reasonable for my screen resolution (Retina MBP)... For original motivation, see http://stackoverflow.com/questions/21590260/yourkit-export-csv-data-formats/21675597.

View ThreadOCR.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
from itertools import takewhile
from PIL import Image
def load_from_user():
img = None
while img is None:
f = raw_input('Image file? ')
try:
img = Image.open(open(f, 'r'))
img.load()
except Exception as e:
print('Unable to load image: %s. Please try again' % f, e)
img = None
return img
def input_int(prompt):
out = None
while out is None:
val = raw_input(prompt)
try:
out = int(val)
except:
out = None
return out
def load_pixel_stream(img, x, y):
image_bounds = img.getbbox()
bbox = (x, y, image_bounds[2], y + 1)
# Crop down to a single line of pixels
data = img.crop(bbox)
return data.getdata(band=1) # Get green band only
def threshold(iterable, thresh):
return [x > thresh for x in iterable]
def runlength(iterable):
curr = None
curr_count = 0
for x in iterable:
if x == curr:
curr_count += 1
elif curr_count > 0:
yield curr_count
curr_count = 1
curr = x
def ocr(raw_data):
cutoff = takewhile(lambda x: x > 0, raw_data)
return runlength(threshold(cutoff, 210))
def avg(px):
return sum(px) / len(px)
def sum(px):
return reduce(lambda x, y: x + y, px)
def seek_next_block(img, x, y):
max_y = img.getbbox()[3]
# Find the white
while y < max_y and avg(img.getpixel((x,y))) < 230:
y += 1
# Now find the colour
while y < max_y and avg(img.getpixel((x,y))) >= 230:
y += 1
return y
def print_row(iter):
l = list(iter)
fmt = ('{},' * len(l))[:-1]
print(fmt.format(*l))
def print_as_table(results):
keys = sorted(results.keys())
print_row(keys)
max_len = max(len(results[k]) for k in keys)
for i in xrange(0, max_len):
row = [results[k][i] if i < len(results[k]) else '' for k in keys]
print_row(row)
if __name__ == '__main__':
img = load_from_user()
x = 500 # input_int('X offset? ')
y = 285 # input_int('Starting Y offset? ')
results = dict()
while y < img.getbbox()[3]:
raw_data = load_pixel_stream(img, x, y)
acc = 0
results[y] = list()
for length in ocr(raw_data):
if acc < 3200:
results[y].append(length)
acc += length
if len(results[y]) == 0:
del results[y]
y = seek_next_block(img, x, y) + 5
print_as_table(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.