Skip to content

Instantly share code, notes, and snippets.

@corolla96
Created May 17, 2019 06:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save corolla96/001cf7ea513b7861cddcecf0e3026811 to your computer and use it in GitHub Desktop.
Save corolla96/001cf7ea513b7861cddcecf0e3026811 to your computer and use it in GitHub Desktop.
House data analysis
import csv
import numpy
def _read_csv(file_name):
house_list = []
with open(file_name, 'r') as f:
reader = csv.DictReader(f)
for house in reader:
house_list.append((house['price'], house['bd'], house['sqft'], house['date'].split('/')[-1]))
return house_list
def _filter_by_bd(houses, min, max):
house_list = []
for price, bd, sqft, year in houses:
for num in range(min, max+1):
if str(num)+'bd' == bd:
house_list.append((price, bd, sqft, year))
return house_list
def _filter_by_year(houses):
house_dict = {}
for bd, sqft, price, year in houses:
house_dict.setdefault(str(year), []).append((bd, sqft, price))
return house_dict
## SCRIPT begins
house_list = _read_csv('seattle_w_header.csv')
print 'There are a total of', len(house_list), 'houses sold.'
house_by_year_dict = _filter_by_year(house_list)
print 'houses sold in 2019:', len(house_by_year_dict['2019'])
print 'houses sold in 2018:', len(house_by_year_dict['2018'])
min = 0
max = 10
filter_house = _filter_by_bd(house_list, min, max)
print min, 'to', max, 'bedrooms:', len(filter_house)
for i in range(min, max+1):
bedroom = _filter_by_bd(filter_house, i, i)
price_list = []
for price, bd, sqft, year in bedroom:
try:
price_list.append(int(price))
except:
pass
arr = numpy.array(price_list, dtype='int')
print str(i) + ' bedrooms:', len(price_list)
print 'mean', arr.mean(), '\tmedian', numpy.median(arr), '\tmin', numpy.min(arr), '\tmax', numpy.max(arr)
print ''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment