Skip to content

Instantly share code, notes, and snippets.

@mwyborski
Last active January 23, 2023 16:44
Show Gist options
  • Save mwyborski/a65215c902bc474451dabc2adb34143f to your computer and use it in GitHub Desktop.
Save mwyborski/a65215c902bc474451dabc2adb34143f to your computer and use it in GitHub Desktop.
Example for benford distribution
import numpy as np
max_rnd_value = 100000
num_samples = 100000
# do benford or uniform distribution
do_benford = True
if do_benford:
num_factors = int(np.floor(np.log10(max_rnd_value)))
max_rnd = 11
else:
num_factors = 1
max_rnd = max_rnd_value
samples = [np.prod(np.random.randint(1,max_rnd, num_factors, np.uint64)) for _ in range(num_samples)]
print('first 10 samples:', samples[:10])
print()
count_dict = {'0': 0, '1': 0, '2': 0, '3': 0, '4': 0, '5': 0, '6': 0, '7': 0, '8': 0, '9': 0}
# check the first digit of each sample
digit_index = 0
for sample in samples:
number_str = str(sample)
if len(number_str) > digit_index:
count_dict[number_str[digit_index]] += 1
print('distribution:')
for k in np.sort(list(count_dict.keys())):
print('%s : %.2f %%' % (k, count_dict[k] / float(num_samples) * 100))
### Output benford distribution (do_benford = True):
# first 10 samples: [504, 448, 2880, 3600, 4000, 480, 11760, 63000, 420, 2560]
#
# distribution:
# 0 : 0.00 %
# 1 : 30.75 %
# 2 : 17.44 %
# 3 : 12.29 %
# 4 : 9.61 %
# 5 : 7.80 %
# 6 : 6.33 %
# 7 : 5.78 %
# 8 : 5.54 %
# 9 : 4.47 %
### Output uniform distribution (do_benford = False):
# first 10 samples: [42129, 12945, 71546, 4571, 1245, 30819, 32629, 32250, 92081, 63509]
#
# distribution:
# 0 : 0.00 %
# 1 : 11.06 %
# 2 : 10.94 %
# 3 : 11.21 %
# 4 : 11.10 %
# 5 : 11.23 %
# 6 : 11.17 %
# 7 : 11.04 %
# 8 : 11.18 %
# 9 : 11.06 %
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment