Instantly share code, notes, and snippets.

@HanaanY /curate.py
Last active Jul 23, 2018

Embed
What would you like to do?
import csv
from collections import defaultdict
#rawdate is format of ['CaptureEventID', 'ClassificationID', 'UserID', 'Species', 'Count', 'Standing', 'Resting', 'Moving', 'Eating', 'Interacting', 'Babies']
#requires raw_data, and all_images
#lets remove the instances where there is more than 1 animal in a still and when there is a human
with open('D:\SnapshotSerengeti\consensus_data.csv', newline='') as rawdata,\
open('D:\SnapshotSerengeti\curated.csv','w', newline='') as curated,\
open('D:\SnapshotSerengeti\\all_images.csv', newline='') as images:
rawReader = csv.DictReader(rawdata)
curWriter = csv.writer(curated)
imgReader = csv.reader(images)
img_urls = defaultdict(list)
for row in imgReader:
img_urls[row[0]].append(row[1]) #add to dictionary value list
compact_header = [ 'CaptureEventID',
'Species',
'Count',
'URL']
curWriter.writerow(compact_header)
for row in rawReader:
try:
count = int(row['Count']) #quick way to avoid parsing non-integers and skip them
if(count==1 and row['Species'] != 'human'):
compact = [ row['CaptureEventID'],
row['Species'],
row['Count'],
img_urls[row['CaptureEventID']] ]
curWriter.writerow(compact)
except ValueError:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment