indra-uolles/gist:ec64b64d531acbe517fbb4df718ef894

## gistfile1.py
import re

def extract_value(fparam_name, fline):
  str_regexp = f'\"\"{fparam_name}\"\":\s[0-9\.]+'
  split_expr = f'""{fparam_name}"":'
  param_with_prefix = re.search(str_regexp, fline).group(0)
  return param_with_prefix.split(split_expr)[1]

def extract_label(fline):
  str_regexp = '\"\"rectanglelabels"\":\s\[\"\"[\w]+\"\"\]'
  try:
    expr_1 = re.search(str_regexp, fline).group(0)
    str_regexp2 = '(\[\"\")([\w]+)(\"\"\])'
    return re.search(str_regexp2, expr_1).group(2)
  except:
    try:
      return line.split(',')[2]
    except:
      print(f'An exception occurred, couldnt extract label from line {fline}')

def extract_img_url(line):
  img_url_with_prefix = line.split(',')[0]
  len_img_url = len(img_url_with_prefix)
  try:
    prefix = re.search('/data/upload/(\d)+/(\w)+-', img_url_with_prefix).group(0)
    len_prefix = len(prefix)
    img_url = line[len_prefix:len_img_url]
    return img_url
  except:
    print(f'An exception occurred, couldnt extract img_url from line {line}')

def extract_new_line(line):
  img_url = extract_img_url(line)
  label = extract_label(line)
  if label == 'Nothing':
    return f'{img_url},,,,,'
  else:
    x = float(extract_value('x', line))
    y = float(extract_value('y', line))
    width = float(extract_value('width', line))
    height = float(extract_value('height', line))
    original_width = float(extract_value('original_width', line))
    original_height = float(extract_value('original_height', line))

    pixel_x = x / 100.0 * original_width
    pixel_y = y / 100.0 * original_height
    pixel_width = width / 100.0 * original_width
    pixel_height = height / 100.0 * original_height

    x1 = pixel_x
    y1 = pixel_y
    x2 = pixel_x + pixel_width
    y2 = pixel_y + pixel_height

    return f'{img_url},{x1},{y1},{x2},{y2},{label}'

lines = []

with open('project.csv') as f:
  lines = f.readlines()

f_out = open("project_out.csv", "w")

count = 0
for line in lines:
  if count > 0:
    f_out.write(extract_new_line(line) + '\n')
  count = count + 1

f.close()
f_out.close()
	import re

	def extract_value(fparam_name, fline):
	str_regexp = f'\"\"{fparam_name}\"\":\s[0-9\.]+'
	split_expr = f'""{fparam_name}"":'
	param_with_prefix = re.search(str_regexp, fline).group(0)
	return param_with_prefix.split(split_expr)[1]

	def extract_label(fline):
	str_regexp = '\"\"rectanglelabels"\":\s\[\"\"[\w]+\"\"\]'
	try:
	expr_1 = re.search(str_regexp, fline).group(0)
	str_regexp2 = '(\[\"\")([\w]+)(\"\"\])'
	return re.search(str_regexp2, expr_1).group(2)
	except:
	try:
	return line.split(',')[2]
	except:
	print(f'An exception occurred, couldnt extract label from line {fline}')

	def extract_img_url(line):
	img_url_with_prefix = line.split(',')[0]
	len_img_url = len(img_url_with_prefix)
	try:
	prefix = re.search('/data/upload/(\d)+/(\w)+-', img_url_with_prefix).group(0)
	len_prefix = len(prefix)
	img_url = line[len_prefix:len_img_url]
	return img_url
	except:
	print(f'An exception occurred, couldnt extract img_url from line {line}')

	def extract_new_line(line):
	img_url = extract_img_url(line)
	label = extract_label(line)
	if label == 'Nothing':
	return f'{img_url},,,,,'
	else:
	x = float(extract_value('x', line))
	y = float(extract_value('y', line))
	width = float(extract_value('width', line))
	height = float(extract_value('height', line))
	original_width = float(extract_value('original_width', line))
	original_height = float(extract_value('original_height', line))

	pixel_x = x / 100.0 * original_width
	pixel_y = y / 100.0 * original_height
	pixel_width = width / 100.0 * original_width
	pixel_height = height / 100.0 * original_height

	x1 = pixel_x
	y1 = pixel_y
	x2 = pixel_x + pixel_width
	y2 = pixel_y + pixel_height

	return f'{img_url},{x1},{y1},{x2},{y2},{label}'

	lines = []

	with open('project.csv') as f:
	lines = f.readlines()

	f_out = open("project_out.csv", "w")

	count = 0
	for line in lines:
	if count > 0:
	f_out.write(extract_new_line(line) + '\n')
	count = count + 1

	f.close()
	f_out.close()