finnp/anvil_slicer.py

## anvil_slicer.py
import csv
import math

# Specify the path to the exported Anvil csv file (*.txt)
filename_input = ''
# Specify the path where the output csv file should be saved
filename_output = ''

# CSV delimiter
delimiter_for_input = '  '
delimiter_for_output = ','


ignore_element_details = True # If set to false information about the source elements is added
slice_duration = 0.1 # in seconds as float
specific_export_range = (60, 240) # (from second, to second), set to false to export everything


element_details = ['start', 'end', 'ID', 'track'] # can be ignored through ignore_element_details

# Put CSV file in dictionary in the form of anvil[attribute][row]
print "Reading %s ..." % filename_input
with open(filename_input) as csvfile:
	csvreader = csv.reader(csvfile, delimiter=delimiter_for_input)
	names = csvreader.next();
	anvil = {}
	for name in names:
		anvil[name] = []
	for row in csvreader:
		for field in range(0,len(names)):
			if names[field] in ['start', 'end', 'duration']:
				# Convert the number fields
				anvil[names[field]].append(float(row[field]))
			else:
				anvil[names[field]].append(row[field] if field < len(row) else "" )


# Test wether they have all the same length
#for field in range(0,len(row)):
#	print(names[field] + " " + str(len(anvil[names[field]])))

# Initialize output format
# sliced[keyname][slice]

def get_slice(time):
	return int(math.floor(time / slice_duration))

anno_start = get_slice(specific_export_range[0] if specific_export_range else min(anvil['start']))
anno_end = get_slice(specific_export_range[1] if specific_export_range else min(anvil['end']))

print "Exporting slices from %i to %i" % (anno_start, anno_end)

sliced = {}
for name in names:
	sliced[name] = [None] * anno_end
	sliced['slicetime'] = [None] * anno_end
	if name in element_details:
		for i in range(0, anno_end):
			sliced[name][i] = []


# Loop through every row of the anvil document
for i in range(0, len(anvil['start'])):
	# For each row extract information and slice between start and end
	time = anvil['start'][i]
	slice_i = get_slice(time)
	while time < anvil['end'][i] and slice_i < anno_end:
		# Save the elements start/end information in lists
		for col in element_details:
			sliced[col][slice_i].append(anvil[col][i])
		# save the slice time
		sliced['slicetime'][slice_i] = slice_duration * float(slice_i)
		# Save the annotation data
		for track in names:
			if not track in element_details:
				sliced[track][slice_i] = anvil[track][i] if anvil[track][i] else sliced[track][slice_i]

		time += slice_duration
		slice_i = get_slice(time)


print "Writing to %s ..." % filename_output
f = open(filename_output, 'w')


# If element details should be ignored, delete them from array (should be refactored)
if ignore_element_details:
	for key in element_details:
	    sliced.pop(key, None)

# First line for the 'keys'
for key in sliced.keys():
	f.write(key)
	f.write(delimiter_for_output)
f.write('\n')


for line in range(0, len(sliced['slicetime'])):
	# test first weather line 'exists'
	if(sliced['slicetime'][line]):
		for key in sliced.keys():
			if(type(sliced[key][line]) is list):
				f.write('-'.join((str(x) for x in sliced[key][line])))
			else:
				f.write(str(sliced[key][line]))
			f.write(delimiter_for_output)
		f.write('\n')
f.close()

print "Completed"
	import csv
	import math

	# Specify the path to the exported Anvil csv file (*.txt)
	filename_input = ''
	# Specify the path where the output csv file should be saved
	filename_output = ''

	# CSV delimiter
	delimiter_for_input = ' '
	delimiter_for_output = ','


	ignore_element_details = True # If set to false information about the source elements is added
	slice_duration = 0.1 # in seconds as float
	specific_export_range = (60, 240) # (from second, to second), set to false to export everything


	element_details = ['start', 'end', 'ID', 'track'] # can be ignored through ignore_element_details

	# Put CSV file in dictionary in the form of anvil[attribute][row]
	print "Reading %s ..." % filename_input
	with open(filename_input) as csvfile:
	csvreader = csv.reader(csvfile, delimiter=delimiter_for_input)
	names = csvreader.next();
	anvil = {}
	for name in names:
	anvil[name] = []
	for row in csvreader:
	for field in range(0,len(names)):
	if names[field] in ['start', 'end', 'duration']:
	# Convert the number fields
	anvil[names[field]].append(float(row[field]))
	else:
	anvil[names[field]].append(row[field] if field < len(row) else "" )


	# Test wether they have all the same length
	#for field in range(0,len(row)):
	# print(names[field] + " " + str(len(anvil[names[field]])))

	# Initialize output format
	# sliced[keyname][slice]

	def get_slice(time):
	return int(math.floor(time / slice_duration))

	anno_start = get_slice(specific_export_range[0] if specific_export_range else min(anvil['start']))
	anno_end = get_slice(specific_export_range[1] if specific_export_range else min(anvil['end']))

	print "Exporting slices from %i to %i" % (anno_start, anno_end)

	sliced = {}
	for name in names:
	sliced[name] = [None] * anno_end
	sliced['slicetime'] = [None] * anno_end
	if name in element_details:
	for i in range(0, anno_end):
	sliced[name][i] = []





	# Loop through every row of the anvil document
	for i in range(0, len(anvil['start'])):
	# For each row extract information and slice between start and end
	time = anvil['start'][i]
	slice_i = get_slice(time)
	while time < anvil['end'][i] and slice_i < anno_end:
	# Save the elements start/end information in lists
	for col in element_details:
	sliced[col][slice_i].append(anvil[col][i])
	# save the slice time
	sliced['slicetime'][slice_i] = slice_duration * float(slice_i)
	# Save the annotation data
	for track in names:
	if not track in element_details:
	sliced[track][slice_i] = anvil[track][i] if anvil[track][i] else sliced[track][slice_i]

	time += slice_duration
	slice_i = get_slice(time)



	print "Writing to %s ..." % filename_output
	f = open(filename_output, 'w')


	# If element details should be ignored, delete them from array (should be refactored)
	if ignore_element_details:
	for key in element_details:
	sliced.pop(key, None)

	# First line for the 'keys'
	for key in sliced.keys():
	f.write(key)
	f.write(delimiter_for_output)
	f.write('\n')


	for line in range(0, len(sliced['slicetime'])):
	# test first weather line 'exists'
	if(sliced['slicetime'][line]):
	for key in sliced.keys():
	if(type(sliced[key][line]) is list):
	f.write('-'.join((str(x) for x in sliced[key][line])))
	else:
	f.write(str(sliced[key][line]))
	f.write(delimiter_for_output)
	f.write('\n')
	f.close()

	print "Completed"