tommct/columnviamerge.py

## columnviamerge.py
def columns_via_merge(df: pd.DataFrame, df2: pd.DataFrame, oncols: list, assigning: list):
    """
    Add (or replace) columns to df that map via a merge with df2.

    Examples:

        # Add the ord value to a subset of a DataFrame
        ABC = [chr(x) for x in range(ord('A'), ord('Z') + 1)]
        AABBCC = [chr(x)+chr(x) for x in range(ord('A'), ord('Z') + 1)]
        abc = [chr(x) for x in range(ord('a'), ord('z') + 1)]

        df = pd.DataFrame(np.concatenate((ABC, abc, AABBCC)).reshape(3, len(ABC)).T,
        columns=['one', 'two', 'three'])
        df.sort_values(by='one', ascending=False, inplace=True)
        df.reset_index(drop=True, inplace=True)

        df2 = pd.DataFrame(np.concatenate((ABC, abc, AABBCC)).reshape(3, len(ABC)).T,
        columns=['one', 'two', 'three'])
        df2 = df2.iloc[0:len(ABC):2]
        df2['oneord'] = df2['one'].apply(ord)
        df2['twoord'] = df2['two'].apply(ord)
        df2.reset_index(drop=True, inplace=True)

        oncols=['one', 'three']
        assigning = ['oneord', 'twoord']
        columns_via_merge(df, df2, oncols, assigning)

        # df is still sorted descending and contains NULLs where df2 was invalid,
        # but now contains ord numbers where df2 had values.

    Args:
        df: DataFrame being updated. Contains at least `oncols`.
        df2: DataFrame that contains `oncols` and `assigning` columns.
        oncols: Columns to map on in the merge.
        assigning: Column name(s) to assign to the original DataFrame.

    Returns:

    """
    if not isinstance(oncols, list):
        raise TypeError('"oncols" argumentmust be a list')
    if not isinstance(assigning, list):
        raise TypeError('"assigning" argument must be a list')
    d2cols = oncols[:]
    d2cols.extend(assigning)
    df[assigning] = df[oncols].merge(df2[d2cols], on=oncols, how='left')[assigning]
	def columns_via_merge(df: pd.DataFrame, df2: pd.DataFrame, oncols: list, assigning: list):
	"""
	Add (or replace) columns to df that map via a merge with df2.

	Examples:

	# Add the ord value to a subset of a DataFrame
	ABC = [chr(x) for x in range(ord('A'), ord('Z') + 1)]
	AABBCC = [chr(x)+chr(x) for x in range(ord('A'), ord('Z') + 1)]
	abc = [chr(x) for x in range(ord('a'), ord('z') + 1)]

	df = pd.DataFrame(np.concatenate((ABC, abc, AABBCC)).reshape(3, len(ABC)).T,
	columns=['one', 'two', 'three'])
	df.sort_values(by='one', ascending=False, inplace=True)
	df.reset_index(drop=True, inplace=True)

	df2 = pd.DataFrame(np.concatenate((ABC, abc, AABBCC)).reshape(3, len(ABC)).T,
	columns=['one', 'two', 'three'])
	df2 = df2.iloc[0:len(ABC):2]
	df2['oneord'] = df2['one'].apply(ord)
	df2['twoord'] = df2['two'].apply(ord)
	df2.reset_index(drop=True, inplace=True)

	oncols=['one', 'three']
	assigning = ['oneord', 'twoord']
	columns_via_merge(df, df2, oncols, assigning)

	# df is still sorted descending and contains NULLs where df2 was invalid,
	# but now contains ord numbers where df2 had values.

	Args:
	df: DataFrame being updated. Contains at least `oncols`.
	df2: DataFrame that contains `oncols` and `assigning` columns.
	oncols: Columns to map on in the merge.
	assigning: Column name(s) to assign to the original DataFrame.

	Returns:

	"""
	if not isinstance(oncols, list):
	raise TypeError('"oncols" argumentmust be a list')
	if not isinstance(assigning, list):
	raise TypeError('"assigning" argument must be a list')
	d2cols = oncols[:]
	d2cols.extend(assigning)
	df[assigning] = df[oncols].merge(df2[d2cols], on=oncols, how='left')[assigning]