Skip to content

Instantly share code, notes, and snippets.

@dangoldin
Created January 11, 2015 23:23
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save dangoldin/14906d4f863cd83f3008 to your computer and use it in GitHub Desktop.
Python script to do a quick analysis of my 2014 stats
import csv, re
from collections import namedtuple, Counter
cols = ['date', 'dow', 'hours_slept', 'mood_morning', 'mood_day', 'mood_evening', 'breakfast', 'lunch', 'dinner', 'drinks', 'snacks', 'coffee', 'weight']
StatDay = namedtuple('StatDay', cols)
RE_NON_WORD = re.compile('\W+')
RE_NUM = re.compile('\d+')
def read_stats(path):
with open(path, 'r') as f:
r = csv.reader(f, delimiter=',', quotechar='"')
next(r, None) # Skip header
for row in r:
yield StatDay(*row)
def is_valid_row(stats_day):
return stats_day.hours_slept and stats_day.mood_morning and stats_day.mood_day and stats_day.mood_evening
def filter_data(data):
return [d for d in data if is_valid_row(d)]
def column_stats(data, col_num, f):
vals = [f(d[col_num]) for d in data]
return {'avg': sum(vals)/len(vals), 'max': max(vals), 'min': min(vals)}
def frequency(data, col_num, f = None):
if f is None:
return Counter(d[col_num] for d in data)
else:
return Counter(y.lower() for d in data for y in f(d[col_num]))
def split_words(v):
return RE_NON_WORD.split(v)
def split_by_char(c):
def f(v):
if v:
return [a.strip() for a in v.split(c)]
else:
return []
return f
def to_float(v):
try:
return float(v)
except:
return 0.0
# Sum of all numbers inside the value
def number_sum(v):
try:
return sum(float(x) for x in RE_NUM.findall(v))
except:
return 0.0
# Convert "# word1, # word2" to [word1] * N + [word2] * N to make counting easy
def noun_count_to_list(v):
if v:
a = [x.strip() for x in v.split(',')]
o = []
for pair in a:
r = pair.split(' ')
o.extend([ ' '.join(r[1:]) ] * int(r[0]))
return o
else:
return []
path = '/Users/danielgoldin/Downloads/stats-2014.csv'
rows = read_stats(path)
rows = filter_data(rows)
print 'Num filtered rows: {}'.format(len(rows))
print 'Avg sleep: {}'.format(column_stats(rows, cols.index('hours_slept'), to_float))
# Moods
print 'Moods - morning: {}'.format(frequency(rows, cols.index('mood_morning')))
print 'Moods - day: {}'.format(frequency(rows, cols.index('mood_day')))
print 'Moods - evening: {}'.format(frequency(rows, cols.index('mood_evening')))
# Eating
print 'Breakfast: {}'.format(frequency(rows, cols.index('breakfast'), split_words))
print 'Lunch: {}'.format(frequency(rows, cols.index('lunch'), split_words))
print 'Dinner: {}'.format(frequency(rows, cols.index('dinner'), split_words))
# Drinking
print 'Drinking Total: {}'.format(column_stats(rows, cols.index('drinks'), number_sum))
print 'Drinking: {}'.format(frequency(rows, cols.index('drinks'), noun_count_to_list))
# Coffee
print 'Coffee: {}'.format(column_stats(rows, cols.index('coffee'), to_float))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment