This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Split data | |
| X_train, X_val, X_test, y_train, y_val, y_test = train_test_val_split(X, y) | |
| # Scale | |
| X_train_scaled_under, X_val_scaled, X_test_scaled = scale_data(X_train_under, X_val, X_test) | |
| # Score | |
| rf_under = model_score('RF', RandomForestClassifier(**rand.best_params_), | |
| X_train_scaled_under, X_val_scaled, X_test_scaled, | |
| y_train_under, y_val, y_test, test=False) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Scale | |
| X_train_scaled_under, X_val_scaled, X_test_scaled = scale_data(X_train_under, X_val, X_test) | |
| # Score | |
| rf_under = model_score('RF', RandomForestClassifier(**rand.best_params_), | |
| X_train_scaled_under, X_val_scaled, X_test_scaled, | |
| y_train_under, y_val, y_test, test=False) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | def over_under_sample(X_train, y_train, Under=True, Over=True): | |
| """ | |
| Input: training features and target | |
| Output: under/oversampled datasets | |
| """ | |
| rus = RandomUnderSampler(random_state=42) | |
| ros = RandomOverSampler(random_state=42) | |
| if Under and Over: | |
| X_train_under, y_train_under = rus.fit_sample(X_train, y_train) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Update dataframe | |
| X = X.iloc[:, np.argsort(rf.feature_importances_,)[7:]] | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Define parameter values that should be searched | |
| bootstrap = [True, False] | |
| max_features = ['auto','sqrt'] | |
| min_samples_leaf = [1,2,4] | |
| min_samples_split = [2,5,10] | |
| n_estimators = [50,100,150,200] | |
| max_depth = [4,6,10,12, None] | |
| # Specify "parameter distributions" rather than a "parameter grid" | |
| param_dist = dict(n_estimators=n_estimators, max_depth = max_depth, bootstrap=bootstrap, | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | def model_score(model_name, model, X_train_scaled, X_val_scaled, | |
| X_test_scaled, y_train, y_val, y_test, test=False): | |
| """ | |
| Input: Transformed feature and target sets | |
| Output: Validation scores. If test=True, includes test scores | |
| """ | |
| print('Calculating validation score...') | |
| my_model = model | |
| my_model.fit(X_train_scaled,y_train) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | def scale_data(X_train, X_val, X_test): | |
| """ | |
| Input: Features (numpy arrays) | |
| Output: Scaled data | |
| """ | |
| scaler = StandardScaler() | |
| X_train_scaled = scaler.fit_transform(X_train) | |
| X_val_scaled = scaler.transform(X_val) | |
| X_test_scaled = scaler.transform(X_test) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Establish features and target variables | |
| X = df.loc[:,'limit_bal':'Apr_Pmt'] | |
| y = df['Default'] | |
| # Encode categorical variables | |
| categoricals = list(X.select_dtypes('object').columns) | |
| numericals = list(X.select_dtypes('int64').columns) | |
| def encode_cats(categoricals, numericals): | |
| """ | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | residuals = abs(dataframe['predictions'] - dataframe['close']) | |
| plt.hist(residuals.values, bins=100) | |
| plt.show(); | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Define number of steps to take | |
| steps = 2 | |
| # Define forecast array for 2 days into the future | |
| forecast = ar1.forecast(steps=steps)[0] | |
| forecast1 = dataframe['close'][-1] * (1 + forecast[0]) | |
| forecast2 = forecast1 * (1 + forecast[1]) | |
| forecast_array = np.array([forecast1, forecast2]) | |
| # Plot close price |