Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
import json
from pprint import pprint as pp
import numpy as np
from numba import autojit, typeof, int32
INF = float('inf')
@autojit
def jenks_matrics_init(data, n_classes, ):
#fill the matrices with data+1 arrays of n_classes 0s
n_data = len(data)
lower_class_limits = np.zeros((n_data + 1, n_classes + 1), dtype=data.dtype)
variance_combinations = np.zeros((n_data + 1, n_classes + 1), dtype=data.dtype)
for i in xrange(1, n_classes + 1):
lower_class_limits[1, i] = 1.
variance_combinations[1, i] = 0.
for j in xrange(2, len(data) + 1):
variance_combinations[j, i] = INF
return lower_class_limits, variance_combinations
@autojit
def jenks_matrices(data, n_classes, lower_class_limits, variance_combinations):
variance = 0.0
for l in range(2, len(data) + 1):
sum = 0.0
sum_squares = 0.0
w = 0.0
for m in range(1, l + 1):
# `III` originally
lower_class_limit = l - m + 1
val = data[lower_class_limit - 1]
# here we're estimating variance for each potential classing
# of the data, for each potential number of classes. `w`
# is the number of data points considered so far.
w += 1
# increase the current sum and sum-of-squares
sum += val
sum_squares += val * val
# the variance at this point in the sequence is the difference
# between the sum of squares and the total x 2, over the number
# of samples.
variance = sum_squares - (sum * sum) / w
i4 = lower_class_limit - 1
if i4 != 0:
for j in xrange(2, n_classes + 1):
if variance_combinations[l, j] >= (variance + variance_combinations[i4, j - 1]):
lower_class_limits[l, j] = lower_class_limit
variance_combinations[l, j] = variance + variance_combinations[i4, j - 1]
lower_class_limits[l, 1] = 1.
variance_combinations[l, 1] = variance
return lower_class_limits, variance_combinations
def get_jenks_breaks(data, lower_class_limits, n_classes):
k = int(len(data) - 1)
kclass = np.zeros(n_classes + 1, dtype=data.dtype)
kclass[n_classes] = data[len(data) - 1]
kclass[0] = data[0]
for countNum in xrange(n_classes, 1, -1):
elt = int(lower_class_limits[k, countNum] - 2)
kclass[countNum - 1] = data[elt]
k = int(lower_class_limits[k, countNum] - 1)
return kclass
def jenks(data, n_classes):
if n_classes > len(data): return
data.sort()
lower_class_limits, variance_combinations = jenks_matrics_init(data, n_classes)
jenks_matrices(data, n_classes, lower_class_limits, variance_combinations)
return get_jenks_breaks(data, lower_class_limits, n_classes)
def main():
rawdata = json.load(open('test.json'))
data = np.array(rawdata)
pp(jenks(data, 5).tolist())
if __name__ == "__main__":
main()
jenks2$ time python numbajenks2.py
[0.0028109620325267315,
2.0935479691252112,
4.205495140049607,
6.178148351609707,
8.09175917180255,
9.997982932254672]
real 0m0.865s
user 0m0.735s
sys 0m0.127s
jenks2$ time python jenks2.py
[0.0028109620325267315,
2.0935479691252112,
4.205495140049607,
6.178148351609707,
8.09175917180255,
9.997982932254672]
real 0m4.390s
user 0m4.373s
sys 0m0.016s
Or,
%timeit jenks2.main()
1 loops, best of 3: 4.58 s per loop
%timeit numbajenks2.main()
1 loops, best of 3: 18.3 ms per loop
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment