Last active
July 23, 2018 19:42
-
-
Save HanaanY/44c89ca5e4f138d632b0363ce344e5b9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from collections import defaultdict | |
#rawdate is format of ['CaptureEventID', 'ClassificationID', 'UserID', 'Species', 'Count', 'Standing', 'Resting', 'Moving', 'Eating', 'Interacting', 'Babies'] | |
#requires raw_data, and all_images | |
#lets remove the instances where there is more than 1 animal in a still and when there is a human | |
with open('D:\SnapshotSerengeti\consensus_data.csv', newline='') as rawdata,\ | |
open('D:\SnapshotSerengeti\curated.csv','w', newline='') as curated,\ | |
open('D:\SnapshotSerengeti\\all_images.csv', newline='') as images: | |
rawReader = csv.DictReader(rawdata) | |
curWriter = csv.writer(curated) | |
imgReader = csv.reader(images) | |
img_urls = defaultdict(list) | |
for row in imgReader: | |
img_urls[row[0]].append(row[1]) #add to dictionary value list | |
compact_header = [ 'CaptureEventID', | |
'Species', | |
'Count', | |
'URL'] | |
curWriter.writerow(compact_header) | |
for row in rawReader: | |
try: | |
count = int(row['Count']) #quick way to avoid parsing non-integers and skip them | |
if(count==1 and row['Species'] != 'human'): | |
compact = [ row['CaptureEventID'], | |
row['Species'], | |
row['Count'], | |
img_urls[row['CaptureEventID']] ] | |
curWriter.writerow(compact) | |
except ValueError: | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment