Skip to content

Instantly share code, notes, and snippets.

@jwhendy
Last active October 3, 2019 15:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jwhendy/2b83320eb33792adabb4c119ae09fced to your computer and use it in GitHub Desktop.
Save jwhendy/2b83320eb33792adabb4c119ae09fced to your computer and use it in GitHub Desktop.
Given a set of data with some sort value column, and columns for an old and new value (where each row's old value is the new value from the previous row), put rows in the correct order when they have the same sort key value.
import pandas as pd
import itertools
df = pd.DataFrame({'group': ['a', 'a', 'a', 'a', 'a', 'a', 'b', 'b', 'b'],
'date': [0, 1, 1, 1, 1, 2, 3, 4, 4],
'old': [1, 8, 2, 2, 5, 5, 4, 10, 7],
'new': [2, 5, 5, 8, 2, 4, 7, 1, 10]})
print(df)
### jumbled: the `new` value of a row is not the same as the next row's `old` value
# group date old new
# 0 a 0 1 2
# 1 a 1 8 5
# 2 a 1 2 5
# 3 a 1 2 8
# 4 a 1 5 2
# 5 a 2 5 4
# 6 b 3 4 7
# 7 b 4 10 1
# 8 b 4 7 10
### this is hacky; brute force permutation search
def find_order(rows, first=None, last=None):
combos = itertools.permutations(range(len(rows)), len(rows))
for combo in combos:
# we apply this test order, then extract the 1:end old values and :-1 new values
# in the correct order, each pair will be the same, so subtracting will equal 0
old = rows.iloc[list(combo[1:])].old.tolist()
new = rows.iloc[list(combo[:-1])].new.tolist()
# if we have a preceeding unambiguous row, we need to include it's new value and
# ensure that in this permutation, the first old value is the same
if first is not None:
old.insert(0, rows['old'].iloc[list(combo)[0]])
new.insert(0, first)
# if we have a trailing unambiguous row, we need to include it's old value and
# ensure that in this permutation, the last new value is the same
if last is not None:
old.append(last)
new.append(rows['new'].iloc[list(combo)[-1]])
# subtract the zipped list and check for all zeros, if so, this is the order we want
if not any([a-b for (a, b) in zip(old, new)]):
return rows.iloc[list(combo)]
return rows
# high level processing of the group
def order_rows(rows):
# if there's only one row in this group, return it
if len(rows) == 1:
return rows
# start a sorted container to add to
df_temp = pd.DataFrame(columns=rows.columns)
# go through each unique date
for date in list(rows.date.unique()):
sub = rows[rows.date == date]
# if our subset is only one row, the date is unique; append it
if len(sub) == 1:
df_temp = df_temp.append(sub)
continue
# if we have a multi-row chunk, the dates are all the same
# if we've already got something in df_temp, there is a precediing constraint
# include this when we pass to the ordering function
first = df_temp['new'].iloc[-1] if len(df_temp)>0 else None
# if the rows in our current chunk and those in df_temp do not equal the total rows passed
# there's another row ahead; use this as a trailing constraint
last = rows['old'].loc[sub.index.max()+1] if len(sub)+len(df_temp)<len(rows) else None
# search for the right order, then append the sorted chunk
sub = find_order(sub, first, last)
df_temp = df_temp.append(sub)
return df_temp
df1 = df.copy()
df1 = df1.groupby(['group'], as_index=False, sort=False).apply(order_rows).reset_index(drop=True)
print(df1)
### correct: the `old` value in each row equals the `new` value of the previous row
# group date old new
# 0 a 0 1 2
# 1 a 1 2 5
# 2 a 1 5 2
# 3 a 1 2 8
# 4 a 1 8 5
# 5 a 2 5 4
# 6 b 3 4 7
# 7 b 4 7 10
# 8 b 4 10 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment