Created
October 23, 2019 13:30
-
-
Save MNic/c4c57ebb5089bcc2fd8c73353278fa00 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bisect import bisect_left | |
import pandas as pd | |
# Create Example Fixture | |
data = [['A', 1], ['B', 3], ['C', 5]] | |
df = pd.DataFrame(data=data, columns=('ID', 'Delta')) | |
## Example 1 | |
# Build out a very large dict with a discrete value key for every | |
# possible value in the bin. Works with ints | |
# Create High level bin map | |
bin_generator = {'(0, 3)': {'low': 0, 'high': 3}, | |
'(3, 4)': {'low': 3, 'high': 4}, | |
'(4, 7)': {'low': 4, 'high': 7}} | |
# Function to generate a 'long' map from the high level | |
def generate_bin_map(bin_gen): | |
bin_map = dict() | |
for key in bin_gen.keys(): | |
for value in range(bin_gen.get(key).get('low'), bin_gen.get(key).get('high')): | |
bin_map[value] = key | |
return bin_map | |
# Create Mapping Dict | |
bin_map = generate_bin_map(bin_generator) | |
df['bin'] = df.Delta.map(bin_map) | |
df | |
## Example 2 | |
# Build out a small dict using the category and the lower bound of each bin | |
# Works with floats or continuous keys. | |
# Generates a { lowerbound_value: bin } dict from bin_generator | |
def generate_bin_map_2(bin_gen): | |
bin_map = dict() | |
for key in bin_gen.keys(): | |
bin_map[bin_gen.get(key).get('low')] = key | |
return bin_map | |
bin_map_2 = generate_bin_map_2(bin_generator) | |
# Finds the relative position of the value within the list of keys then | |
# returns the bin for the key to the left. | |
def apply_bin_map(x, bin_map): | |
key_list = list(bin_map.keys()) | |
if x in key_list: | |
tier = key_list[bisect_left(key_list, x)] | |
else: | |
tier = key_list[bisect_left(key_list, x)-1] | |
return bin_map.get(tier) | |
df['bin_2'] = df.Delta.apply(lambda x: apply_bin_map(x, bin_map_2)) | |
df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment