Skip to content

Instantly share code, notes, and snippets.

@Gclabbe
Created August 10, 2021 02:54
Show Gist options
  • Save Gclabbe/666f73e98651f0f46acc1150cadc8f9e to your computer and use it in GitHub Desktop.
Save Gclabbe/666f73e98651f0f46acc1150cadc8f9e to your computer and use it in GitHub Desktop.
Fourth Brain MLE week 1 -- using Pivot instead of GroupBy
def get_features_and_targets(df, scale_y=None):
df = df.drop(columns=['MSRP'])
counts = df.pivot_table(columns=list(df.columns), aggfunc='size')
X = counts.index
Y = counts.values[:][:, None]
if scale_y != None:
if scale_y == 'by_volume':
Y = Y / len(df)
elif scale_y == 'by_normal':
Y = (Y - min(Y)) / (max(Y) - min(Y))
elif scale_y == 'by_stdev':
Y = (Y - np.average(Y)) / np.std(Y)
else:
assert scale_by == None, "scale_by needs to be set to 'None', 'by_volume', 'by_normal' or 'by_standard'"
return X, Y
# this version allows for the resulting multiindex to be treated as a set ...
all_models = set(train_X.append(test_X))
q12_only = set(train_X) - set(test_X)
q34_only = set(test_X) - set(train_X)
print(len(q12_only))
all_quarters = set(train_X) - q12_only
print(len(all_quarters))
# How many cars that were sold in Q12 were discontinued by Q34?
len(q12_only)
# How many cars were launched in Q34?
len(q34_only)
# Now it's time to convert train_X and test_X to a dataframe for the rest of the assignment
train_X_df = train_X.to_frame(index=False)
test_X_df = test_X.to_frame(index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment