Skip to content

Instantly share code, notes, and snippets.

@naure
Created May 30, 2017 17:15
Show Gist options
  • Save naure/83bfc20d7b01f980ba37979e4c9db9f2 to your computer and use it in GitHub Desktop.
Save naure/83bfc20d7b01f980ba37979e4c9db9f2 to your computer and use it in GitHub Desktop.
pandas: Manage shared categories for several columns and dataframes
#%% Manage shared categories for several columns and dataframes
import pandas as pd
from collections import defaultdict
globalCats = defaultdict(lambda: pd.Categorical([]))
def convertCategories(df, field, fieldForCats=None):
" Convert field in df to categories "
cats = globalCats[fieldForCats or field]
_extendCategories(cats, df[field])
df[field] = df[field].astype('category', categories=cats.categories)
return df
def ensureSameCats(a, b):
" Make sure the a and b series are using the same categories "
ac = a.cat.categories
bc = b.cat.categories
if ac is bc:
return
if len(ac) > len(bc): # Ensure b has more than a
a, b = b, a
ac, bc = bc, ac
assert (ac == bc[:len(ac)]).all(), "Categories are not compatible."
a.cat.set_categories(bc, inplace=True)
def _extendCategories(cats, values):
" Add categories found in values to the existing cats object "
newCats = set(values).difference(cats.categories)
newCats = list(filter(pd.notnull, newCats))
if newCats:
cats.add_categories(newCats, inplace=True)
def testCategories():
df = pd.DataFrame(dict(
a=["x","y","x"],
a2=["z","y","w"], # Share cats with "a"
b=["z","y","w"], # Not shared
))
convertCategories(df, "a")
convertCategories(df, "a2", fieldForCats="a") # Share cats with "a"
assert set(df.a.cat.categories) == set(["x","y"])
assert (df.a2.cat.categories[:2] == df.a.cat.categories).all() # Contains "x" from the other "a" field
ensureSameCats(df.a, df.a2)
assert (df.a2.cat.categories == df.a.cat.categories).all()
assert set(df.a2.cat.categories) == set(["x","y","z","w"])
convertCategories(df, "b")
assert set(df.b.cat.categories) == set(["z","y","w"]) # No "x" in "b"
testCategories()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment