Skip to content

Instantly share code, notes, and snippets.

@rosiecakes
Last active December 29, 2016 02:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rosiecakes/0747b75cc9729ada766092b23bafdd50 to your computer and use it in GitHub Desktop.
Save rosiecakes/0747b75cc9729ada766092b23bafdd50 to your computer and use it in GitHub Desktop.
dataquest birth years
# convert
['2000,1,1,6,9083',
'2000,1,2,7,8006',
'2000,1,3,1,11363',
'2000,1,4,2,13032',
'2000,1,5,3,12558',
'2000,1,6,4,12466',
'2000,1,7,5,12516',
'2000,1,8,6,8934',
'2000,1,9,7,7949',
'2000,1,10,1,11668']
# to
[[2000, 1, 1, 6, 9083],
[2000, 1, 2, 7, 8006],
[2000, 1, 3, 1, 11363],
[2000, 1, 4, 2, 13032],
[2000, 1, 5, 3, 12558],
[2000, 1, 6, 4, 12466],
[2000, 1, 7, 5, 12516],
[2000, 1, 8, 6, 8934],
[2000, 1, 9, 7, 7949],
[2000, 1, 10, 1, 11668],
[2000, 1, 11, 2, 12611]]
def read_csv(filename):
data = open(filename).read().split('\n')
string_list = data[1:]
final_list = []
for each in string_list:
int_fields = []
string_fields = each.split(',')
int_fields = [int(x) for x in string_fields]
final_list.append(int_fields)
return final_list
def month_births(list_of_lists):
births_per_month = {}
for l in list_of_lists:
month = l[1]
births = l[4]
if month in births_per_month.keys():
births_per_month[month] += births
births_per_month[month] = births
return births_per_month
cdc_month_births = month_births(cdc_list)
# {1: 11843,
# 2: 11671,
# 3: 11511,
# 4: 11591,
# 5: 8462,
# 6: 12243,
# 7: 12673,
# 8: 7884,
# 9: 12959,
# 10: 10837,
# 11: 7228,
# 12: 11990}
def dow_births(list_of_lists):
births_per_dow = {}
for l in list_of_lists:
dow = l[3]
births = l[4]
if dow in births_per_dow.keys():
births_per_dow[dow] += births
births_per_dow[dow] = births
return births_per_dow
cdc_day_births = dow_births(cdc_list)
# {1: 12811, 2: 13634, 3: 11990, 4: 6749, 5: 10386, 6: 8656, 7: 7724}
def calc_counts(data, column):
results = {}
for d in data:
unit = d[column]
if unit in results.keys():
results[unit] += d[4]
results[unit] = d[4]
return results
cdc_year_births = calc_counts(cdc_list, 0)
# {2000: 7892,
# 2001: 10272,
# 2002: 12582,
# 2003: 12540,
# 2004: 10130,
# 2005: 8635,
# 2006: 7569,
# 2007: 11102,
# 2008: 12906,
# 2009: 11667,
# 2010: 9751,
# 2011: 8035,
# 2012: 10634,
# 2013: 12525,
# 2014: 11990}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment