Skip to content

Instantly share code, notes, and snippets.

@finnp
Created May 16, 2013 15:52
Show Gist options
  • Save finnp/5592762 to your computer and use it in GitHub Desktop.
Save finnp/5592762 to your computer and use it in GitHub Desktop.
Python script that slices the elements from an exported Anvil file. This way each line of the data will have a slice of the annotation, instead of information about an anvil element. Anvil: http://www.anvil-software.org/
import csv
import math
# Specify the path to the exported Anvil csv file (*.txt)
filename_input = ''
# Specify the path where the output csv file should be saved
filename_output = ''
# CSV delimiter
delimiter_for_input = ' '
delimiter_for_output = ','
ignore_element_details = True # If set to false information about the source elements is added
slice_duration = 0.1 # in seconds as float
specific_export_range = (60, 240) # (from second, to second), set to false to export everything
element_details = ['start', 'end', 'ID', 'track'] # can be ignored through ignore_element_details
# Put CSV file in dictionary in the form of anvil[attribute][row]
print "Reading %s ..." % filename_input
with open(filename_input) as csvfile:
csvreader = csv.reader(csvfile, delimiter=delimiter_for_input)
names = csvreader.next();
anvil = {}
for name in names:
anvil[name] = []
for row in csvreader:
for field in range(0,len(names)):
if names[field] in ['start', 'end', 'duration']:
# Convert the number fields
anvil[names[field]].append(float(row[field]))
else:
anvil[names[field]].append(row[field] if field < len(row) else "" )
# Test wether they have all the same length
#for field in range(0,len(row)):
# print(names[field] + " " + str(len(anvil[names[field]])))
# Initialize output format
# sliced[keyname][slice]
def get_slice(time):
return int(math.floor(time / slice_duration))
anno_start = get_slice(specific_export_range[0] if specific_export_range else min(anvil['start']))
anno_end = get_slice(specific_export_range[1] if specific_export_range else min(anvil['end']))
print "Exporting slices from %i to %i" % (anno_start, anno_end)
sliced = {}
for name in names:
sliced[name] = [None] * anno_end
sliced['slicetime'] = [None] * anno_end
if name in element_details:
for i in range(0, anno_end):
sliced[name][i] = []
# Loop through every row of the anvil document
for i in range(0, len(anvil['start'])):
# For each row extract information and slice between start and end
time = anvil['start'][i]
slice_i = get_slice(time)
while time < anvil['end'][i] and slice_i < anno_end:
# Save the elements start/end information in lists
for col in element_details:
sliced[col][slice_i].append(anvil[col][i])
# save the slice time
sliced['slicetime'][slice_i] = slice_duration * float(slice_i)
# Save the annotation data
for track in names:
if not track in element_details:
sliced[track][slice_i] = anvil[track][i] if anvil[track][i] else sliced[track][slice_i]
time += slice_duration
slice_i = get_slice(time)
print "Writing to %s ..." % filename_output
f = open(filename_output, 'w')
# If element details should be ignored, delete them from array (should be refactored)
if ignore_element_details:
for key in element_details:
sliced.pop(key, None)
# First line for the 'keys'
for key in sliced.keys():
f.write(key)
f.write(delimiter_for_output)
f.write('\n')
for line in range(0, len(sliced['slicetime'])):
# test first weather line 'exists'
if(sliced['slicetime'][line]):
for key in sliced.keys():
if(type(sliced[key][line]) is list):
f.write('-'.join((str(x) for x in sliced[key][line])))
else:
f.write(str(sliced[key][line]))
f.write(delimiter_for_output)
f.write('\n')
f.close()
print "Completed"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment