Skip to content

Instantly share code, notes, and snippets.

@olsososo
Last active December 30, 2015 05:48
Show Gist options
  • Save olsososo/7784794 to your computer and use it in GitHub Desktop.
Save olsososo/7784794 to your computer and use it in GitHub Desktop.
# -*- coding:utf-8 -*-
from __future__ import division
import sys
import math
import string
from PIL import Image
from collections import Counter
def q():
sys.exit()
if __name__ == '__main__':
if len(sys.argv) < 1:
print "Usage: %s image.jpg [dir]" % sys.argv[0]
else:
w = 50
h = 100
im = sys.argv[1]
im = Image.open(im)
im.load()
width,height = im.size
im = im.resize((w, h), Image.BILINEAR).convert('L')
data = list(im.getdata())
f = open('./data.txt','w')
border = 11000
real_width = 80
real_height = 80
left_threshold = 240
right_threshold = 240
min_space_threshold = int(math.ceil(0.8*(real_width / width) * w))
max_space_threshold = int(math.ceil(1.2*(real_width / width) * w))
left_min_deviation = 2
right_min_deviation = 2
distance_min_deviation = 2
starts = []
stops = []
amount = []
rows = {}
coordinate = []
original_rows = []
text_rows = []
for i in xrange(h):
row = data[i*w:i*w+w]
total = reduce(lambda x, y: x + y,row)
amount.append(total)
if total > border:
f.write(''.ljust(50,'-')+"\r\n")
f.write(str(i)+" "+repr(row)+" "+str(total)+"\r\n")
else:
f.write(str(i)+" "+repr(row)+" "+str(total)+"\r\n")
rows[i] = {}
for r in xrange(len(row)):
if i not in starts and row[r] >= left_threshold and r+1 < w and row[r+1] < left_threshold:
starts.append(i)
rows[i]['start'] = r+1
elif i in starts and row[r] < right_threshold and r+1 < w and row[r+1] >= right_threshold:
rows[i]['stop'] = r
stops.append(i)
break
if i not in stops and r+1 == w:
del rows[i]
f.close()
start_list = []
stop_list = []
start_times = []
stop_times = []
for k,v in rows.items():
start_list.append(v['start'])
stop_list.append(v['stop'])
for i in start_list:
start_times.append(start_list.count(i))
m = max(start_times)
start_index = start_times.index(m)
for i in stop_list:
stop_times.append(stop_list.count(i))
m = max(stop_times)
stop_index = stop_times.index(m)
start = start_list[start_index]
stop = stop_list[stop_index]
distance = stop - start
#filter
for k,v in rows.items():
if math.fabs(v['start'] - start) > left_min_deviation and math.fabs(v['stop'] - stop) > right_min_deviation \
or (math.fabs(v['stop'] - v['start'] - distance) > distance_min_deviation):
del rows[k]
width_proportion = width / w
height_proportion = height / h
#coordinate,pixel
keys = rows.keys()
for index,item in enumerate(keys):
if (item - 1) not in keys:
rect = [int(math.ceil(width_proportion*(start-1))),int(math.ceil(height_proportion*(item-1)))]
original_rect = [start-1,item-1]
rect_text = [int(math.ceil(width_proportion*(stop+1))),int(math.ceil(height_proportion*(item-1)))]
elif index == len(keys) - 1 or keys[index + 1] - item != 1:
rect.extend([int(math.ceil(width_proportion*(stop+1))) ,int(math.ceil(height_proportion*(item+1)))])
original_rect.extend([stop+1,item+1])
rect_text.extend([width,int(math.ceil(height_proportion*(item+1)))])
coordinate.append(rect)
original_rows.append(original_rect)
text_rows.append(rect_text)
target_dir = './thumb/'
text_name = []
p_text_name = []
name = []
name_list = list(string.lowercase)
for index,item in enumerate(coordinate):
text_name.append(name_list[index]+'_text.jpg')
p_text_name.append('p'+name_list[index]+'_text.jpg')
name.append(name_list[index]+'.jpg')
im = Image.open(sys.argv[1])
for index,item in enumerate(text_rows):
box = tuple(item)
newIm = im.crop(box)
newIm.save(target_dir+text_name[index])
#tailor
im = Image.open(sys.argv[1])
for index,item in enumerate(coordinate):
box = tuple(item)
newIm = im.crop(box)
newIm.save(target_dir+name[index])
t_w = 180
t_h = 40
t_left_threshold = 200
t_stop_threshold = 200
t_left_min_deviation = 2
t_stop_min_deviation = 2
t_min_length = 15
t_error = 10
for p in text_name:
t_start_list = []
t_stop_list = []
t_start_times = []
t_stop_times = []
t_rows = []
t_im = Image.open(target_dir+p)
t_width,t_height = t_im.size
t_im = t_im.resize((t_w, t_h), Image.BILINEAR).convert('L')
t_data = list(t_im.getdata())
for t in xrange(t_h):
t_row = t_data[t*t_w:t*t_w+t_w]
t_rows.append(t_row)
total = reduce(lambda x, y: x + y,t_row)
for r in xrange(len(t_row)):
if t_row[r] >= t_left_threshold and r + 1 <t_w and t_row[r+1] < t_left_threshold:
t_start_list.append(r)
break
for i in t_start_list:
t_start_times.append(t_start_list.count(i))
t_m = max(t_start_times)
t_start_index = t_start_times.index(t_m) #left
t_left_top = None #top
t_left_bottom = None #bottom
t_width_proportion = t_width / t_w
t_height_proportion = t_height / t_h
t_stop = []
for i in xrange(len(t_start_list)):
if t_left_top is None and math.fabs(t_start_list[i] - t_start_index) <= t_left_min_deviation and i+2 < t_h and \
math.fabs(t_start_list[i+1] - t_start_index) <= t_left_min_deviation and \
math.fabs(t_start_list[i+2] - t_start_index) <= t_left_min_deviation:
t_left_top = i
if t_left_top is not None and math.fabs(t_start_list[i] - t_start_index) <= t_left_min_deviation and i+2 < t_h and \
math.fabs(t_start_list[i+1] - t_start_index) > t_left_min_deviation and \
math.fabs(t_start_list[i+2] - t_start_index) > t_left_min_deviation:
t_left_bottom = i
break
for t_r in xrange(len(t_rows)):
if t_r >= t_left_top and t_r <= t_left_bottom:
for t_rk in xrange(len(t_rows[t_r])):
if t_rk > t_start_index and t_rk <= t_w - t_min_length:
temp = t_rows[t_r][t_rk:t_rk+t_min_length]
t_b = True
for rp in temp:
if rp < t_left_threshold:
t_b = False
break
if t_b:
t_stop.append(t_rk)
break
t_stop_avg = reduce(lambda x, y: x + y,t_stop) / len(t_stop)
for abc in t_stop:
if math.fabs(abc - t_stop_avg) > t_error:
del t_stop[t_stop.index(abc)]
t_stop_index = max(t_stop)
t_box = [int(math.ceil(t_start_index*t_width_proportion)),int(math.ceil((t_left_top-1)*t_height_proportion))]
t_box.extend([int(math.ceil(t_stop_index*t_width_proportion)),int(math.ceil((t_left_bottom+1)*t_height_proportion))])
t_im = Image.open(target_dir+p)
t_box = tuple(t_box)
newIm = t_im.crop(t_box)
newIm.save(target_dir+'p'+p)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment