This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | DROP TABLE IF EXISTS orders; | |
| CREATE TABLE orders ( | |
| order_id serial primary key, | |
| order_date timestamp, | |
| user_id integer, | |
| amount numeric, | |
| num_product_items integer | |
| ); | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | SELECT | |
| user_id, | |
| order_id, | |
| product_name, | |
| amount, | |
| -- rank items within the order by amount | |
| ROW_NUMBER() OVER (PARTITION BY order_id ORDER BY amount DESC) AS row_num, | |
| -- rank items within the order by amount and highlight same values with same rank | |
| DENSE_RANK() OVER (PARTITION BY order_id ORDER BY amount DESC) AS dense_rnk, | |
| -- understand the percentile ranking of items in terms of amount within the order | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | SELECT | |
| order_id, | |
| amount, | |
| num_product_items, | |
| -- ranking by num_product_items | |
| ROW_NUMBER() OVER (ORDER BY num_product_items) AS row_num_by_items, | |
| RANK() OVER (ORDER BY num_product_items) AS rank_by_items, | |
| DENSE_RANK() OVER (ORDER BY num_product_items) AS dense_rank_by_items, | |
| -- ranking by amount | |
| ROW_NUMBER() OVER (ORDER BY amount) AS row_num_by_amount, | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | SELECT | |
| user_id, | |
| order_id, | |
| product_name, | |
| amount, | |
| NTILE(3) OVER (PARTITION BY order_id) AS ntile_by_order_id, | |
| FIRST_VALUE(amount) OVER (PARTITION BY order_id) AS first_amount_value_by_order_id, | |
| COUNT(*) OVER (PARTITION BY order_id) AS count_by_order_id, | |
| SUM(amount) OVER (PARTITION BY order_id) AS sum_by_order_id, | |
| ROUND(AVG(amount) OVER (PARTITION BY order_id), 4) AS avg_amount_by_order_id | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | SELECT | |
| t1.order_id, | |
| t1.order_date, | |
| -- aggregate functions + OVER clause = window functions | |
| SUM(t1.amount) OVER () AS total_amount, | |
| ROUND(AVG(t1.amount) OVER (), 2) AS avg_order_size, | |
| SUM(t1.num_product_items) OVER () AS total_product_items, | |
| COUNT(*) OVER () AS total_count, | |
| -- create ratios and % of total using values from window functions | |
| ROUND(1.0 * t1.amount / SUM(t1.amount) OVER (), 4) AS percent_amount, | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | SELECT | |
| t1.*, | |
| SUM(t1.amount) OVER () AS total_amount, | |
| COUNT(*) OVER () AS total_count, | |
| AVG(t1.amount) OVER () AS total_mean | |
| FROM orders t1; | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # define our optimal random forest model | |
| best_rfgs = rfgs.best_estimator_ | |
| # create model dict variable for output_model_stats function | |
| model = {"rf_grid_search": best_rfgs} | |
| df_rfgs = output_model_stats(model, X_train, y_train) | |
| df_rfgs = df_rfgs.append(df_rfrs) | |
| df_rfgs | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # instantiate base random forest model class | |
| rf = RandomForestClassifier(random_state=0) | |
| # define gridsearch parameters | |
| param_grid = {'criterion': ['gini', 'entropy'], | |
| 'n_estimators': [300, 500, 700], | |
| 'max_features': ['auto', 'log2'], | |
| 'max_depth': [3, 5, 7], | |
| 'min_samples_split': [8, 10, 12], | |
| 'min_samples_leaf': [8, 10, 12]} | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # define our optimal random forest model | |
| best_rfrs = rf_rsearch.best_estimator_ | |
| # create model dict variable for output_model_stats function | |
| model = {"rf_randomized_search": best_rfrs} | |
| df_rfrs = output_model_stats(model, X_train, y_train) | |
| df_rfrs = df_rfrs.append(df_iteration_3) | |
| df_rfrs | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # param_distributions | |
| rs_grid = {'n_estimators': [100, 300, 500, 800, 1000, 1200, 1400], | |
| 'criterion': ['gini', 'entropy'], | |
| 'max_features': ['auto', 'log2'], | |
| 'max_depth': [3, 5, 7, 9, 15, 25, 30], | |
| 'min_samples_split': [2, 3, 4, 5, 7, 10, 15], | |
| 'min_samples_leaf': [1, 2, 5, 10, 12, 14]} | |
| # instantiate RF model class for tuning | |
| rf = RandomForestClassifier(random_state=0) | 
NewerOlder