Skip to content

Instantly share code, notes, and snippets.

@MNic
Created October 23, 2019 13:30
Show Gist options
  • Save MNic/c4c57ebb5089bcc2fd8c73353278fa00 to your computer and use it in GitHub Desktop.
Save MNic/c4c57ebb5089bcc2fd8c73353278fa00 to your computer and use it in GitHub Desktop.
from bisect import bisect_left
import pandas as pd
# Create Example Fixture
data = [['A', 1], ['B', 3], ['C', 5]]
df = pd.DataFrame(data=data, columns=('ID', 'Delta'))
## Example 1
# Build out a very large dict with a discrete value key for every
# possible value in the bin. Works with ints
# Create High level bin map
bin_generator = {'(0, 3)': {'low': 0, 'high': 3},
'(3, 4)': {'low': 3, 'high': 4},
'(4, 7)': {'low': 4, 'high': 7}}
# Function to generate a 'long' map from the high level
def generate_bin_map(bin_gen):
bin_map = dict()
for key in bin_gen.keys():
for value in range(bin_gen.get(key).get('low'), bin_gen.get(key).get('high')):
bin_map[value] = key
return bin_map
# Create Mapping Dict
bin_map = generate_bin_map(bin_generator)
df['bin'] = df.Delta.map(bin_map)
df
## Example 2
# Build out a small dict using the category and the lower bound of each bin
# Works with floats or continuous keys.
# Generates a { lowerbound_value: bin } dict from bin_generator
def generate_bin_map_2(bin_gen):
bin_map = dict()
for key in bin_gen.keys():
bin_map[bin_gen.get(key).get('low')] = key
return bin_map
bin_map_2 = generate_bin_map_2(bin_generator)
# Finds the relative position of the value within the list of keys then
# returns the bin for the key to the left.
def apply_bin_map(x, bin_map):
key_list = list(bin_map.keys())
if x in key_list:
tier = key_list[bisect_left(key_list, x)]
else:
tier = key_list[bisect_left(key_list, x)-1]
return bin_map.get(tier)
df['bin_2'] = df.Delta.apply(lambda x: apply_bin_map(x, bin_map_2))
df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment