Created
December 1, 2016 02:26
-
-
Save TaylorMutch/7a6e95fa1b3a32c1cf94fa4be5f22d87 to your computer and use it in GitHub Desktop.
Generates a classification data structure from a CSV declaring the classification.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Collect each of Jerilyn's nights into a dictionary structure, and packs and reloads it from a db if necessary for speed. | |
''' | |
import os | |
import sys | |
import csv | |
import sqlite3 | |
import datetime | |
from sodar_utils import SodarCollection, register_adapters, datetime_to_name | |
register_adapters() # registers numpy array adapters | |
def read_classification_data(path): | |
with open(path, 'r') as f: | |
reader = csv.DictReader(f) | |
data = [row for row in reader] | |
# Convert 1's and 0's to python booleans | |
for row in data: | |
for attribute in row.keys(): | |
if attribute not in ['year','month','day']: | |
value = row[attribute] | |
row[attribute] = True if value == 1 or value == '1' else False | |
return data | |
def _build_from_db(path): | |
con = sqlite3.connect(path, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) | |
with con: | |
cur = con.cursor() | |
cur.execute("SELECT * FROM night") | |
data = cur.fetchall() | |
result = [{ | |
'speeds': row[2], | |
'directions': row[3], | |
'date': row[1] | |
} for row in data] | |
return result | |
def _build_from_sodars(path, night_dates): | |
sodars = SodarCollection(path) | |
speeds, s_meta = sodars.night_array('speed') | |
dirs, d_meta = sodars.night_array('direction') | |
result = list() | |
for night_date in night_dates: | |
try: | |
s_index = [i for i, j in enumerate(s_meta) if j['name'] == datetime_to_name(night_date['date'])][0] | |
d_index = [i for i, j in enumerate(d_meta) if j['name'] == datetime_to_name(night_date['date'])][0] | |
s_data = speeds[s_index] | |
d_data = dirs[d_index] | |
result.append({ | |
'speeds': s_data, | |
'directions': d_data, | |
'date': night_date | |
}) | |
except: | |
continue # bail and move on | |
# build a db so we don't have to index every time | |
db_path = ''.join([path, os.sep, 'collection.db']) | |
if not os.path.exists(db_path): | |
con = sqlite3.connect(db_path, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) | |
with con: | |
cur = con.cursor() | |
cur.execute("CREATE TABLE night (id integer primary key, timestamp date, speeds array, directions array)") | |
for row in result: | |
speed = row['speeds'] | |
dir = row['directions'] | |
date = row['date'] | |
cur.execute("INSERT INTO night VALUES (NULL, ?,?,?)", (date, speed, dir)) | |
return result | |
def build_night_classification(directory, class_path): | |
""" Generate a list of dictionaries that contain the classification and data for each pair of nights | |
:param directory: The directory to the sodar date you want to load. | |
:param class_path: The path to the classification data. | |
:return: A list of dictionaries containing the classification and raw data for each night. | |
""" | |
# sanity checking | |
directory_contents = os.listdir(directory) | |
mcrae_path = os.path.abspath(os.path.join(directory, 'McRae')) | |
primet_path = os.path.abspath(os.path.join(directory, 'Primet')) | |
found_mcrae = 'McRae' in directory_contents and os.path.isdir(mcrae_path) | |
found_primet = 'Primet' in directory_contents and os.path.isdir(primet_path) | |
if found_mcrae and found_primet: | |
meta = read_classification_data(class_path) # the original classification | |
night_dates = [row.update({'date': datetime.date(int(row['year']), int(row['month']), int(row['day']))}) for row in meta] | |
paths = [mcrae_path, primet_path] | |
night_lists = list() | |
for path in paths: | |
db_path = ''.join([path, os.sep, 'collection.db']) | |
if os.path.exists(db_path): | |
night_lists.append(_build_from_db(db_path)) | |
else: | |
night_lists.append(_build_from_sodars(path, night_dates)) | |
classification = list() | |
mcrae, primet = night_lists | |
for m in mcrae: | |
for p in primet: | |
if m['date'] == p['date']: | |
classification.append({ | |
'date': m['date'], | |
'primet_speeds': p['speeds'], | |
'primet_directions': p['directions'], | |
'mcrae_speeds': m['speeds'], | |
'mcrae_directions': p['directions'], | |
'meta': [row for row in meta if row['date'] == m['date']][0] | |
}) | |
break | |
return classification | |
else: | |
raise FileNotFoundError('Could not find the McRae and Primet directories. Are you pointing this ' | |
'to the right directory?') | |
if __name__ == '__main__': | |
jerilyn_classification = build_night_classification(sys.argv[1], sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment