Skip to content

Instantly share code, notes, and snippets.

@dgk
Created November 13, 2013 07:53
Show Gist options
  • Save dgk/7445315 to your computer and use it in GitHub Desktop.
Save dgk/7445315 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from collections import OrderedDict
prices = [
390000,
550000,
550000,
580000,
665000,
670000,
700000,
750000,
755000,
760000,
800000,
830000,
850000,
850000,
870000,
870000,
890000,
899000,
900000,
900000,
900000,
900000,
900000,
900000,
915000,
923574,
930000,
930000,
930000,
930000,
930000,
945000,
949000,
950000,
950000,
950000,
950000,
950000,
950000,
950000,
950000,
950000,
950000,
950000,
950000,
955740,
960000,
960000,
965000,
969000,
]
def average(items):
if not items:
return 0
return sum(items) / len(items)
def calc(prices, count_parts):
_min, _max = min(prices), max(prices)
step = (_max - _min) / count_parts
#print step
return [average(filter(lambda price: i * step <= price < (i + 1) * step, prices))
for i in range(count_parts)]
from scipy import stats
def average(items):
if not items:
return 0
return sum(items) / len(items)
def calc(prices, step_percent):
_min, _max = min(prices), max(prices)
prices = sorted(prices)
step_limit = (_max - _min) * step_percent / 100
while 1:
current_min_range = prices[-1]
new_prices = []
for i in range(len(prices) - 1):
price_range = abs(prices[i] - prices[i + 1])
if price_range < current_min_range:
current_min_range = price_range
new_prices.append(average([prices[i], prices[i + 1]]))
if current_min_range >= step_limit:
break
prices = new_prices
return prices
def rec_calc(prices, count_parts):
prices = sorted(prices)
while len(prices) > count_parts:
new_prices = []
ranges = sorted(abs(prices[i] - prices[i + 1])
for i in range(len(prices) - 1))
max_range = ranges[len(ranges) / 2]
for i in range(len(prices) - 1):
if abs(prices[i] - prices[i + 1]) < max_range:
new_prices.append(average([prices[i], prices[i + 1]]))
else:
new_prices.append(prices[i])
prices = new_prices
return prices
def calc(prices, count_parts):
prices = sorted(prices)
while len(prices) > count_parts:
_min = prices[-1]
_min_i = None
for i in range(len(prices) - 1):
v = abs(prices[i] - prices[i + 1])
if v < _min:
_min = v
_min_i = i
prices[_min_i:_min_i+2] = [average([prices[_min_i], prices[_min_i + 1]])]
return prices
#print calc(prices, 10)
_prices = [
20,
50,
51,
52,
52,
70,
100,
]
def average(items):
if not items:
return 0
return sum(items) / len(items)
def calc(prices, count_parts):
prices = sorted(prices)
prices_counts = [1 for i in prices]
prices_len = len(prices)
while len(prices) > count_parts:
_min = prices[-1]
_min_i = None
for i in range(len(prices) - 1):
v = abs(prices[i] - prices[i + 1])
if v < _min:
_min = v
_min_i = i
prices[_min_i:_min_i + 2] = [average([prices[_min_i], prices[_min_i + 1]])]
prices_counts[_min_i:_min_i + 2] = [prices_counts[_min_i] + prices_counts[_min_i + 1]]
return zip(prices, [100 * i / prices_len for i in prices_counts])
#for k, v in calc(prices, 10):
# print "%s - %s%%" % (k, v)
# percentiles = OrderedDict([ ('p_%d' % x, stats.scoreatpercentile(prices, x*10)) for x in range(11) ])
#
# for x in range(11):
# percentile = percentiles['p_%d' % x]
# print stats.percentileofscore(prices, percentile), percentile
prices_list = [
[
390000, 550000, 550000, 580000, 665000, 670000, 700000, 750000, 755000, 760000,
800000, 830000, 850000, 850000, 870000, 870000, 890000, 899000, 900000, 900000,
900000, 900000, 900000, 900000, 915000, 923574, 930000, 930000, 930000, 930000,
930000, 945000, 949000, 950000, 950000, 950000, 950000, 950000, 950000, 950000,
950000, 950000, 950000, 950000, 950000, 955740, 960000, 960000, 965000, 969000,
],
[20, 50, 51, 52, 52, 70, 100],
]
def average(items):
if not items:
return 0
return sum(items) / len(items)
def get_min_range(prices):
return min(abs(prices[i] - prices[i + 1])
for i in range(len(prices) - 1))
def calc(prices, max_parts):
prices = sorted(prices)
prices_counts = [1 for i in prices]
prices_len = len(prices)
min_range = sorted(abs(prices[i] - prices[i + 1])
for i in range(len(prices) - 1))[len(prices) / 2]
while len(prices) > 2 and (len(prices) > max_parts or get_min_range(prices) < min_range):
_min = prices[-1]
_min_i = None
for i in range(len(prices) - 1):
v = abs(prices[i] - prices[i + 1])
if v < _min:
_min = v
_min_i = i
prices[_min_i:_min_i + 2] = [average([prices[_min_i], prices[_min_i + 1]])]
prices_counts[_min_i:_min_i + 2] = [prices_counts[_min_i] + prices_counts[_min_i + 1]]
return zip(prices, prices_counts)
return zip(prices, [100 * i / prices_len for i in prices_counts])
for prices in prices_list:
print '=' * 78
print '-' * 78
print prices
print '-' * 78
all = 0
for k, v in calc(prices, 10):
print "%s - %s" % (k, v)
all+=v
assert all == len(prices)
#print prices
percentiles = OrderedDict([ ('p_%d' % x, stats.scoreatpercentile(prices, x*10)) for x in range(11) ])
for x in range(11):
percentile = percentiles['p_%d' % x]
#print stats.percentileofscore(prices, percentile), percentile
prices_count = float(len(prices))
all = 0
# #print [(x, x*5) for x in range(21) if x%2]
# for i in [ x*5 for x in range(21) if x%2]:
# start = i-5
# stop = i+5
# start_p = stats.scoreatpercentile(prices, start)
# stop_p = stats.scoreatpercentile(prices, stop)
# count = len([x for x in prices if x >= start_p and x <=stop_p])
# perc = count/prices_count*100 if count else 0
# print i, start_p, stop_p, count, perc
# all+=count
#
# print prices_count, all
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment