Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Asymmetric difference of two pandas Dataframes
def diff_df(df1, df2, how="left"):
"""
Find Difference of rows for given two dataframes
this function is not symmetric, means
diff(x, y) != diff(y, x)
however
diff(x, y, how='left') == diff(y, x, how='right')
Ref: https://stackoverflow.com/questions/18180763/set-difference-for-pandas/40209800#40209800
"""
if (df1.columns != df2.columns).any():
raise ValueError("Two dataframe columns must match")
if df1.equals(df2):
return None
elif how == 'right':
return pd.concat([df2, df1, df1]).drop_duplicates(keep=False)
elif how == 'left':
return pd.concat([df1, df2, df2]).drop_duplicates(keep=False)
else:
raise ValueError('how parameter supports only "left" or "right keywords"')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment