Skip to content

Instantly share code, notes, and snippets.

@shaan-shah
Last active August 18, 2020 09:26
Show Gist options
  • Save shaan-shah/9a32b40adbad28d4f8be037c591150f9 to your computer and use it in GitHub Desktop.
Save shaan-shah/9a32b40adbad28d4f8be037c591150f9 to your computer and use it in GitHub Desktop.
This was made to demonstrate code on medium.
def data_trainer(Target_Variable,data_raw,n_valid,date_column=None):
df_raw=data_raw
reset_rf_samples()
''' This if statement is to reduce the date part'''
if date_column:
add_datepart(df_raw,date_column)
train_cats(df_raw)
df,y,nas=proc_df(df_raw,Target_Variable)
n_trn=len(df)-n_valid
raw_train,raw_valid=split_vals(df_raw,n_trn)
X_train,X_valid=split_vals(df,n_trn)
y_train,y_valid=split_vals(y,n_trn)
''' The Decider is the rf sampling we will be doing to speed up the process'''
decider=None
if len(X_train)>20000:
decider=20000
''' from here we are tuning the parameters '''
if decider:
set_rf_samples(decider)
score=0
min_leaf_a=0
max_feature_a=None
z=None
list1=[1,3,5,10,25]
list2=[0.1,0.25,0.5,0.75,0.9,"sqrt","log2",1]
for leafs in list1:
for features in list2:
t=auto_train(a=leafs,b=features,X_train=X_train,y_train=y_train)
print(fx(t,X_valid=X_valid,y_valid=y_valid),leafs)
if fx(t,X_valid=X_valid,y_valid=y_valid)>score:
score=fx(m=t,X_valid=X_valid,y_valid=y_valid)
min_leaf_a=leafs
max_feature_a=features
z=t
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment