Hammond95/separator.py

## separator.py
def explode_from_fields(df, target_columns: list, separator: str):
    """ df = dataframe to split,
        target_columns = list of the columns containing the values to split,
                         if the elements returned from the split are not the
                         same for each columns, the shorter ones are extended
                         to the longest.
        separator = the symbol used to perform the split
        returns: A dataframe with each entry for the target column separated,
                 with each element moved into a new row.
                 The values in the other columns are duplicated across the newly divided rows."""
    def _split_row(row, rows, tcs, sep):
        splits = []
        has_none = False
        for tc in tcs:
            if row[tc]:
                splits.append(row[tc].split(sep))
            else:
                splits.append([None])

        max_len = len(max(splits, key=lambda x: len(x) if x else 0))

        for s in splits:
            if s:
                s.extend([None]*(max_len - len(s)))
            else:
                s = [None]*max_len

        for group in list(zip(*splits)):
            new_row = row.to_dict()
            for tc, g in zip(tcs, group):
                new_row[tc] = g
            rows.append(new_row)

    new_rows = []
    df.apply(_split_row, axis=1, args=(new_rows, target_columns, separator))
    new_df = pd.DataFrame(new_rows)

    return new_df
	def explode_from_fields(df, target_columns: list, separator: str):
	""" df = dataframe to split,
	target_columns = list of the columns containing the values to split,
	if the elements returned from the split are not the
	same for each columns, the shorter ones are extended
	to the longest.
	separator = the symbol used to perform the split
	returns: A dataframe with each entry for the target column separated,
	with each element moved into a new row.
	The values in the other columns are duplicated across the newly divided rows."""
	def _split_row(row, rows, tcs, sep):
	splits = []
	has_none = False
	for tc in tcs:
	if row[tc]:
	splits.append(row[tc].split(sep))
	else:
	splits.append([None])

	max_len = len(max(splits, key=lambda x: len(x) if x else 0))

	for s in splits:
	if s:
	s.extend([None]*(max_len - len(s)))
	else:
	s = [None]*max_len

	for group in list(zip(*splits)):
	new_row = row.to_dict()
	for tc, g in zip(tcs, group):
	new_row[tc] = g
	rows.append(new_row)

	new_rows = []
	df.apply(_split_row, axis=1, args=(new_rows, target_columns, separator))
	new_df = pd.DataFrame(new_rows)

	return new_df