Created
July 20, 2017 18:23
-
-
Save tommct/5f67a7f310c72e453953980ce346572d to your computer and use it in GitHub Desktop.
Add columns to Pandas DataFrame by (left) merging with another.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def columns_via_merge(df: pd.DataFrame, df2: pd.DataFrame, oncols: list, assigning: list): | |
""" | |
Add (or replace) columns to df that map via a merge with df2. | |
Examples: | |
# Add the ord value to a subset of a DataFrame | |
ABC = [chr(x) for x in range(ord('A'), ord('Z') + 1)] | |
AABBCC = [chr(x)+chr(x) for x in range(ord('A'), ord('Z') + 1)] | |
abc = [chr(x) for x in range(ord('a'), ord('z') + 1)] | |
df = pd.DataFrame(np.concatenate((ABC, abc, AABBCC)).reshape(3, len(ABC)).T, | |
columns=['one', 'two', 'three']) | |
df.sort_values(by='one', ascending=False, inplace=True) | |
df.reset_index(drop=True, inplace=True) | |
df2 = pd.DataFrame(np.concatenate((ABC, abc, AABBCC)).reshape(3, len(ABC)).T, | |
columns=['one', 'two', 'three']) | |
df2 = df2.iloc[0:len(ABC):2] | |
df2['oneord'] = df2['one'].apply(ord) | |
df2['twoord'] = df2['two'].apply(ord) | |
df2.reset_index(drop=True, inplace=True) | |
oncols=['one', 'three'] | |
assigning = ['oneord', 'twoord'] | |
columns_via_merge(df, df2, oncols, assigning) | |
# df is still sorted descending and contains NULLs where df2 was invalid, | |
# but now contains ord numbers where df2 had values. | |
Args: | |
df: DataFrame being updated. Contains at least `oncols`. | |
df2: DataFrame that contains `oncols` and `assigning` columns. | |
oncols: Columns to map on in the merge. | |
assigning: Column name(s) to assign to the original DataFrame. | |
Returns: | |
""" | |
if not isinstance(oncols, list): | |
raise TypeError('"oncols" argumentmust be a list') | |
if not isinstance(assigning, list): | |
raise TypeError('"assigning" argument must be a list') | |
d2cols = oncols[:] | |
d2cols.extend(assigning) | |
df[assigning] = df[oncols].merge(df2[d2cols], on=oncols, how='left')[assigning] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment