Python script to do a quick analysis of my 2014 stats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv, re | |
from collections import namedtuple, Counter | |
cols = ['date', 'dow', 'hours_slept', 'mood_morning', 'mood_day', 'mood_evening', 'breakfast', 'lunch', 'dinner', 'drinks', 'snacks', 'coffee', 'weight'] | |
StatDay = namedtuple('StatDay', cols) | |
RE_NON_WORD = re.compile('\W+') | |
RE_NUM = re.compile('\d+') | |
def read_stats(path): | |
with open(path, 'r') as f: | |
r = csv.reader(f, delimiter=',', quotechar='"') | |
next(r, None) # Skip header | |
for row in r: | |
yield StatDay(*row) | |
def is_valid_row(stats_day): | |
return stats_day.hours_slept and stats_day.mood_morning and stats_day.mood_day and stats_day.mood_evening | |
def filter_data(data): | |
return [d for d in data if is_valid_row(d)] | |
def column_stats(data, col_num, f): | |
vals = [f(d[col_num]) for d in data] | |
return {'avg': sum(vals)/len(vals), 'max': max(vals), 'min': min(vals)} | |
def frequency(data, col_num, f = None): | |
if f is None: | |
return Counter(d[col_num] for d in data) | |
else: | |
return Counter(y.lower() for d in data for y in f(d[col_num])) | |
def split_words(v): | |
return RE_NON_WORD.split(v) | |
def split_by_char(c): | |
def f(v): | |
if v: | |
return [a.strip() for a in v.split(c)] | |
else: | |
return [] | |
return f | |
def to_float(v): | |
try: | |
return float(v) | |
except: | |
return 0.0 | |
# Sum of all numbers inside the value | |
def number_sum(v): | |
try: | |
return sum(float(x) for x in RE_NUM.findall(v)) | |
except: | |
return 0.0 | |
# Convert "# word1, # word2" to [word1] * N + [word2] * N to make counting easy | |
def noun_count_to_list(v): | |
if v: | |
a = [x.strip() for x in v.split(',')] | |
o = [] | |
for pair in a: | |
r = pair.split(' ') | |
o.extend([ ' '.join(r[1:]) ] * int(r[0])) | |
return o | |
else: | |
return [] | |
path = '/Users/danielgoldin/Downloads/stats-2014.csv' | |
rows = read_stats(path) | |
rows = filter_data(rows) | |
print 'Num filtered rows: {}'.format(len(rows)) | |
print 'Avg sleep: {}'.format(column_stats(rows, cols.index('hours_slept'), to_float)) | |
# Moods | |
print 'Moods - morning: {}'.format(frequency(rows, cols.index('mood_morning'))) | |
print 'Moods - day: {}'.format(frequency(rows, cols.index('mood_day'))) | |
print 'Moods - evening: {}'.format(frequency(rows, cols.index('mood_evening'))) | |
# Eating | |
print 'Breakfast: {}'.format(frequency(rows, cols.index('breakfast'), split_words)) | |
print 'Lunch: {}'.format(frequency(rows, cols.index('lunch'), split_words)) | |
print 'Dinner: {}'.format(frequency(rows, cols.index('dinner'), split_words)) | |
# Drinking | |
print 'Drinking Total: {}'.format(column_stats(rows, cols.index('drinks'), number_sum)) | |
print 'Drinking: {}'.format(frequency(rows, cols.index('drinks'), noun_count_to_list)) | |
# Coffee | |
print 'Coffee: {}'.format(column_stats(rows, cols.index('coffee'), to_float)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment