Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Add columns to Pandas DataFrame by (left) merging with another.
def columns_via_merge(df: pd.DataFrame, df2: pd.DataFrame, oncols: list, assigning: list):
"""
Add (or replace) columns to df that map via a merge with df2.
Examples:
# Add the ord value to a subset of a DataFrame
ABC = [chr(x) for x in range(ord('A'), ord('Z') + 1)]
AABBCC = [chr(x)+chr(x) for x in range(ord('A'), ord('Z') + 1)]
abc = [chr(x) for x in range(ord('a'), ord('z') + 1)]
df = pd.DataFrame(np.concatenate((ABC, abc, AABBCC)).reshape(3, len(ABC)).T,
columns=['one', 'two', 'three'])
df.sort_values(by='one', ascending=False, inplace=True)
df.reset_index(drop=True, inplace=True)
df2 = pd.DataFrame(np.concatenate((ABC, abc, AABBCC)).reshape(3, len(ABC)).T,
columns=['one', 'two', 'three'])
df2 = df2.iloc[0:len(ABC):2]
df2['oneord'] = df2['one'].apply(ord)
df2['twoord'] = df2['two'].apply(ord)
df2.reset_index(drop=True, inplace=True)
oncols=['one', 'three']
assigning = ['oneord', 'twoord']
columns_via_merge(df, df2, oncols, assigning)
# df is still sorted descending and contains NULLs where df2 was invalid,
# but now contains ord numbers where df2 had values.
Args:
df: DataFrame being updated. Contains at least `oncols`.
df2: DataFrame that contains `oncols` and `assigning` columns.
oncols: Columns to map on in the merge.
assigning: Column name(s) to assign to the original DataFrame.
Returns:
"""
if not isinstance(oncols, list):
raise TypeError('"oncols" argumentmust be a list')
if not isinstance(assigning, list):
raise TypeError('"assigning" argument must be a list')
d2cols = oncols[:]
d2cols.extend(assigning)
df[assigning] = df[oncols].merge(df2[d2cols], on=oncols, how='left')[assigning]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.