Skip to content

Instantly share code, notes, and snippets.

@Miladiouss
Last active July 1, 2019 10:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Miladiouss/8429337b78490a1f1c7591a8ab5e1e45 to your computer and use it in GitHub Desktop.
Save Miladiouss/8429337b78490a1f1c7591a8ab5e1e45 to your computer and use it in GitHub Desktop.
Calculate and appends ordinality to a dataframe
def appendOrdinality(df, columns, ascending, strictlyIncreasing=False):
"""
Appends absolute and relative ordinality to a dataframe.
Oridinality is the normalized position of a row in a sorted dataframe.
df:
Pandas DataFrame
columns:
List of column names for to be used for sorting (e.g. ['prob_1'])
ascending:
List of ascending True/False for each row
strictlyIncreasing:
ordinality will increment even if two consecutive rows are the same
=== Example and Module Test ===
rs = np.random.RandomState(9)
dfTest = pd.DataFrame(columns=['prob_0', 'prob_1'])
dfTest['prob_0'] = rs.randint(0, 10, 10) / 10
dfTest['prob_1'] = rs.randint(0, 10, 10) / 10
appendOrdinality(dfTest, ['prob_1', 'prob_0'], [False, True])
dfTest
"""
# Sort by prob_0 from low to high, since two consecutive values might be the same, also use prob_1 to sort
df.sort_values(columns, ascending=ascending, inplace=True)
df.reset_index(inplace=True, drop=True)
# Create two columns
df['ordinality'] = np.nan
df['abs_ordinality'] = -1
n = len(df)
# Initiate the first row
abs_ordinality = 0
i = 0
df.at[i, 'ordinality' ] = abs_ordinality / n
df.at[i, 'abs_ordinality'] = abs_ordinality
# iterate from the second row
for i in range(1, n):
# condition to only increment abs_ordinality if two rows are not the same, i.e. increasing/decreasing
conds = [( abs(df.at[i, columns[j]] - df.at[i - 1, columns[j]]) > 0) for j in range(len(columns))]
if sum(conds) > 0 or strictlyIncreasing:
abs_ordinality += 1
# update elements
df.at[i, 'ordinality' ] = abs_ordinality / n
df.at[i, 'abs_ordinality'] = abs_ordinality
rs = np.random.RandomState(9)
dfTest = pd.DataFrame(columns=['prob_0', 'prob_1'])
dfTest['prob_0'] = rs.randint(0, 10, 10) / 10
dfTest['prob_1'] = rs.randint(0, 10, 10) / 10
appendOrdinality(dfTest, ['prob_1', 'prob_0'], [False, True])
dfTest
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment