Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from glob import glob
import os
from collections import defaultdict
from pprint import pprint
import PIL
def detectnet_input():
#images = glob(os.path.join(FLAGS.data_dir, '*.jpg'))
exclude= ["page tabs", "original price", "product name0055-0922", "promotion text"]
images = glob('*.jpg')
examples = []
stats = defaultdict(int)
for image in images:
annotation = image.replace('.jpg', '.txt')
try:
with open(annotation, 'r') as f:
content = f.read()
#get image_size
image_pil = PIL.Image.open(image)
width, height = image_pil.size
except FileNotFoundError as e:
print('Skipping', annotation)
print(' ', e)
continue
objects = []
try:
for line in content.split('\n'):
if not line:
continue
name, rest = line.split(' 0.0 ', 1)
if name in exclude:
continue
from PIL import Image
stats[name] += 1
_, _, xmin, ymin, xmax, ymax, *_ = rest.split()
#conver to float
image_box = dict(name=name,
xmin=max(0, float(xmin) / width),
ymin=max(0, float(ymin) / height),
xmax=max(0, float(xmax) / width),
ymax=max(0, float(ymax) / height),
)
objects.append(image_box)
except ValueError as e:
print('Skipping', annotation)
print(' ', e)
continue
if objects:
examples.append(dict(
objects=objects,
screenshot=image))
pprint(stats)
return examples
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.