Instantly share code, notes, and snippets.

# sklam/numbajenks2.py Last active Feb 23, 2018

 import json from pprint import pprint as pp import numpy as np from numba import autojit, typeof, int32 INF = float('inf') @autojit def jenks_matrics_init(data, n_classes, ): #fill the matrices with data+1 arrays of n_classes 0s n_data = len(data) lower_class_limits = np.zeros((n_data + 1, n_classes + 1), dtype=data.dtype) variance_combinations = np.zeros((n_data + 1, n_classes + 1), dtype=data.dtype) for i in xrange(1, n_classes + 1): lower_class_limits[1, i] = 1. variance_combinations[1, i] = 0. for j in xrange(2, len(data) + 1): variance_combinations[j, i] = INF return lower_class_limits, variance_combinations @autojit def jenks_matrices(data, n_classes, lower_class_limits, variance_combinations): variance = 0.0 for l in range(2, len(data) + 1): sum = 0.0 sum_squares = 0.0 w = 0.0 for m in range(1, l + 1): # `III` originally lower_class_limit = l - m + 1 val = data[lower_class_limit - 1] # here we're estimating variance for each potential classing # of the data, for each potential number of classes. `w` # is the number of data points considered so far. w += 1 # increase the current sum and sum-of-squares sum += val sum_squares += val * val # the variance at this point in the sequence is the difference # between the sum of squares and the total x 2, over the number # of samples. variance = sum_squares - (sum * sum) / w i4 = lower_class_limit - 1 if i4 != 0: for j in xrange(2, n_classes + 1): if variance_combinations[l, j] >= (variance + variance_combinations[i4, j - 1]): lower_class_limits[l, j] = lower_class_limit variance_combinations[l, j] = variance + variance_combinations[i4, j - 1] lower_class_limits[l, 1] = 1. variance_combinations[l, 1] = variance return lower_class_limits, variance_combinations def get_jenks_breaks(data, lower_class_limits, n_classes): k = int(len(data) - 1) kclass = np.zeros(n_classes + 1, dtype=data.dtype) kclass[n_classes] = data[len(data) - 1] kclass[0] = data[0] for countNum in xrange(n_classes, 1, -1): elt = int(lower_class_limits[k, countNum] - 2) kclass[countNum - 1] = data[elt] k = int(lower_class_limits[k, countNum] - 1) return kclass def jenks(data, n_classes): if n_classes > len(data): return data.sort() lower_class_limits, variance_combinations = jenks_matrics_init(data, n_classes) jenks_matrices(data, n_classes, lower_class_limits, variance_combinations) return get_jenks_breaks(data, lower_class_limits, n_classes) def main(): rawdata = json.load(open('test.json')) data = np.array(rawdata) pp(jenks(data, 5).tolist()) if __name__ == "__main__": main()
 jenks2\$ time python numbajenks2.py [0.0028109620325267315, 2.0935479691252112, 4.205495140049607, 6.178148351609707, 8.09175917180255, 9.997982932254672] real 0m0.865s user 0m0.735s sys 0m0.127s jenks2\$ time python jenks2.py [0.0028109620325267315, 2.0935479691252112, 4.205495140049607, 6.178148351609707, 8.09175917180255, 9.997982932254672] real 0m4.390s user 0m4.373s sys 0m0.016s Or, %timeit jenks2.main() 1 loops, best of 3: 4.58 s per loop %timeit numbajenks2.main() 1 loops, best of 3: 18.3 ms per loop