Skip to content

Instantly share code, notes, and snippets.

@TaylorMutch
Created December 1, 2016 02:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TaylorMutch/7a6e95fa1b3a32c1cf94fa4be5f22d87 to your computer and use it in GitHub Desktop.
Save TaylorMutch/7a6e95fa1b3a32c1cf94fa4be5f22d87 to your computer and use it in GitHub Desktop.
Generates a classification data structure from a CSV declaring the classification.
'''
Collect each of Jerilyn's nights into a dictionary structure, and packs and reloads it from a db if necessary for speed.
'''
import os
import sys
import csv
import sqlite3
import datetime
from sodar_utils import SodarCollection, register_adapters, datetime_to_name
register_adapters() # registers numpy array adapters
def read_classification_data(path):
with open(path, 'r') as f:
reader = csv.DictReader(f)
data = [row for row in reader]
# Convert 1's and 0's to python booleans
for row in data:
for attribute in row.keys():
if attribute not in ['year','month','day']:
value = row[attribute]
row[attribute] = True if value == 1 or value == '1' else False
return data
def _build_from_db(path):
con = sqlite3.connect(path, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
with con:
cur = con.cursor()
cur.execute("SELECT * FROM night")
data = cur.fetchall()
result = [{
'speeds': row[2],
'directions': row[3],
'date': row[1]
} for row in data]
return result
def _build_from_sodars(path, night_dates):
sodars = SodarCollection(path)
speeds, s_meta = sodars.night_array('speed')
dirs, d_meta = sodars.night_array('direction')
result = list()
for night_date in night_dates:
try:
s_index = [i for i, j in enumerate(s_meta) if j['name'] == datetime_to_name(night_date['date'])][0]
d_index = [i for i, j in enumerate(d_meta) if j['name'] == datetime_to_name(night_date['date'])][0]
s_data = speeds[s_index]
d_data = dirs[d_index]
result.append({
'speeds': s_data,
'directions': d_data,
'date': night_date
})
except:
continue # bail and move on
# build a db so we don't have to index every time
db_path = ''.join([path, os.sep, 'collection.db'])
if not os.path.exists(db_path):
con = sqlite3.connect(db_path, detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES)
with con:
cur = con.cursor()
cur.execute("CREATE TABLE night (id integer primary key, timestamp date, speeds array, directions array)")
for row in result:
speed = row['speeds']
dir = row['directions']
date = row['date']
cur.execute("INSERT INTO night VALUES (NULL, ?,?,?)", (date, speed, dir))
return result
def build_night_classification(directory, class_path):
""" Generate a list of dictionaries that contain the classification and data for each pair of nights
:param directory: The directory to the sodar date you want to load.
:param class_path: The path to the classification data.
:return: A list of dictionaries containing the classification and raw data for each night.
"""
# sanity checking
directory_contents = os.listdir(directory)
mcrae_path = os.path.abspath(os.path.join(directory, 'McRae'))
primet_path = os.path.abspath(os.path.join(directory, 'Primet'))
found_mcrae = 'McRae' in directory_contents and os.path.isdir(mcrae_path)
found_primet = 'Primet' in directory_contents and os.path.isdir(primet_path)
if found_mcrae and found_primet:
meta = read_classification_data(class_path) # the original classification
night_dates = [row.update({'date': datetime.date(int(row['year']), int(row['month']), int(row['day']))}) for row in meta]
paths = [mcrae_path, primet_path]
night_lists = list()
for path in paths:
db_path = ''.join([path, os.sep, 'collection.db'])
if os.path.exists(db_path):
night_lists.append(_build_from_db(db_path))
else:
night_lists.append(_build_from_sodars(path, night_dates))
classification = list()
mcrae, primet = night_lists
for m in mcrae:
for p in primet:
if m['date'] == p['date']:
classification.append({
'date': m['date'],
'primet_speeds': p['speeds'],
'primet_directions': p['directions'],
'mcrae_speeds': m['speeds'],
'mcrae_directions': p['directions'],
'meta': [row for row in meta if row['date'] == m['date']][0]
})
break
return classification
else:
raise FileNotFoundError('Could not find the McRae and Primet directories. Are you pointing this '
'to the right directory?')
if __name__ == '__main__':
jerilyn_classification = build_night_classification(sys.argv[1], sys.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment