Skip to content

Instantly share code, notes, and snippets.

View mdominguez2010's full-sized avatar
🎯
Focusing

Marcos Dominguez mdominguez2010

🎯
Focusing
View GitHub Profile
# Split data
X_train, X_val, X_test, y_train, y_val, y_test = train_test_val_split(X, y)
# Scale
X_train_scaled_under, X_val_scaled, X_test_scaled = scale_data(X_train_under, X_val, X_test)
# Score
rf_under = model_score('RF', RandomForestClassifier(**rand.best_params_),
X_train_scaled_under, X_val_scaled, X_test_scaled,
y_train_under, y_val, y_test, test=False)
# Scale
X_train_scaled_under, X_val_scaled, X_test_scaled = scale_data(X_train_under, X_val, X_test)
# Score
rf_under = model_score('RF', RandomForestClassifier(**rand.best_params_),
X_train_scaled_under, X_val_scaled, X_test_scaled,
y_train_under, y_val, y_test, test=False)
def over_under_sample(X_train, y_train, Under=True, Over=True):
"""
Input: training features and target
Output: under/oversampled datasets
"""
rus = RandomUnderSampler(random_state=42)
ros = RandomOverSampler(random_state=42)
if Under and Over:
X_train_under, y_train_under = rus.fit_sample(X_train, y_train)
# Update dataframe
X = X.iloc[:, np.argsort(rf.feature_importances_,)[7:]]
# Define parameter values that should be searched
bootstrap = [True, False]
max_features = ['auto','sqrt']
min_samples_leaf = [1,2,4]
min_samples_split = [2,5,10]
n_estimators = [50,100,150,200]
max_depth = [4,6,10,12, None]
# Specify "parameter distributions" rather than a "parameter grid"
param_dist = dict(n_estimators=n_estimators, max_depth = max_depth, bootstrap=bootstrap,
def model_score(model_name, model, X_train_scaled, X_val_scaled,
X_test_scaled, y_train, y_val, y_test, test=False):
"""
Input: Transformed feature and target sets
Output: Validation scores. If test=True, includes test scores
"""
print('Calculating validation score...')
my_model = model
my_model.fit(X_train_scaled,y_train)
def scale_data(X_train, X_val, X_test):
"""
Input: Features (numpy arrays)
Output: Scaled data
"""
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)
# Establish features and target variables
X = df.loc[:,'limit_bal':'Apr_Pmt']
y = df['Default']
# Encode categorical variables
categoricals = list(X.select_dtypes('object').columns)
numericals = list(X.select_dtypes('int64').columns)
def encode_cats(categoricals, numericals):
"""
@mdominguez2010
mdominguez2010 / residuals.py
Created February 16, 2021 18:34
google forecast
residuals = abs(dataframe['predictions'] - dataframe['close'])
plt.hist(residuals.values, bins=100)
plt.show();
@mdominguez2010
mdominguez2010 / forecast.py
Last active February 16, 2021 18:30
google forecast
# Define number of steps to take
steps = 2
# Define forecast array for 2 days into the future
forecast = ar1.forecast(steps=steps)[0]
forecast1 = dataframe['close'][-1] * (1 + forecast[0])
forecast2 = forecast1 * (1 + forecast[1])
forecast_array = np.array([forecast1, forecast2])
# Plot close price