HanaanY/curate.py

## curate.py
import csv
from collections import defaultdict
#rawdate is format of ['CaptureEventID', 'ClassificationID', 'UserID', 'Species', 'Count', 'Standing', 'Resting', 'Moving', 'Eating', 'Interacting', 'Babies']
#requires raw_data, and all_images


#lets remove the instances where there is more than 1 animal in a still and when there is a human
with open('D:\SnapshotSerengeti\consensus_data.csv', newline='') as rawdata,\
        open('D:\SnapshotSerengeti\curated.csv','w', newline='') as curated,\
            open('D:\SnapshotSerengeti\\all_images.csv', newline='') as images:

    rawReader = csv.DictReader(rawdata)
    curWriter = csv.writer(curated)
    imgReader = csv.reader(images)

    img_urls = defaultdict(list)

    for row in imgReader:
        img_urls[row[0]].append(row[1]) #add to dictionary value list

    compact_header = [  'CaptureEventID',
                        'Species',
                        'Count',
                        'URL']

    curWriter.writerow(compact_header)

    for row in rawReader:
        try:
            count = int(row['Count']) #quick way to avoid parsing non-integers and skip them
            if(count==1 and row['Species'] != 'human'):
                compact = [ row['CaptureEventID'],
                            row['Species'],
                            row['Count'],
                            img_urls[row['CaptureEventID']] ]

                curWriter.writerow(compact)
        except ValueError:
            pass
	import csv
	from collections import defaultdict
	#rawdate is format of ['CaptureEventID', 'ClassificationID', 'UserID', 'Species', 'Count', 'Standing', 'Resting', 'Moving', 'Eating', 'Interacting', 'Babies']
	#requires raw_data, and all_images


	#lets remove the instances where there is more than 1 animal in a still and when there is a human
	with open('D:\SnapshotSerengeti\consensus_data.csv', newline='') as rawdata,\
	open('D:\SnapshotSerengeti\curated.csv','w', newline='') as curated,\
	open('D:\SnapshotSerengeti\\all_images.csv', newline='') as images:

	rawReader = csv.DictReader(rawdata)
	curWriter = csv.writer(curated)
	imgReader = csv.reader(images)

	img_urls = defaultdict(list)

	for row in imgReader:
	img_urls[row[0]].append(row[1]) #add to dictionary value list

	compact_header = [ 'CaptureEventID',
	'Species',
	'Count',
	'URL']

	curWriter.writerow(compact_header)

	for row in rawReader:
	try:
	count = int(row['Count']) #quick way to avoid parsing non-integers and skip them
	if(count==1 and row['Species'] != 'human'):
	compact = [ row['CaptureEventID'],
	row['Species'],
	row['Count'],
	img_urls[row['CaptureEventID']] ]

	curWriter.writerow(compact)
	except ValueError:
	pass