Orhan Yalcin ogyalcin

## Add_Existing_Project_To_Git.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                ogyalcin
                / Add_Existing_Project_To_Git.md
            
            
              Created
              August 31, 2021 09:01
                — forked from alexpchin/Add_Existing_Project_To_Git.md
            
              
                Add Existing Project To Git Repo
              
          
    #Adding an existing project to GitHub using the command line
Simple steps to add existing project to Github.
1. Create a new repository on GitHub.

In Terminal, change the current working directory to your local project.
##2. Initialize the local directory as a Git repository.
git init


## RFSignificantRules.py
pd.set_option('display.max_colwidth', 400) # Adjust row width to read the entire rule
pd.options.display.float_format = '{:.2f}'.format # Round decimals to 2 decimal places
rules = rulefit.get_rules() # Get the rules
rules = rules[rules['type']!='linear'] # Eliminate the existing explanatory variables
rules = rules[rules['coef'] != 0] # eliminate the insignificant rules
rules = rules.sort_values('support', ascending=False) # Sort the rules based on "support" value
rules = rules[rules['rule'].str.len()>30] # optional: To see more complex rules, filter the long rules
rules.iloc[0:5] # Show the first 5 rules

## RFRuleFitRules.py
rules = rulefit.get_rules()
rules = rules.sort_values('support', ascending=False)
rules.iloc[:15]

## RFRuleFitEvaluate.py
# The RMSE of the RuleFit model
rulefit_preds = rulefit.predict(X.values)
rulefit_rmse = np.sqrt(((rulefit_preds - y) ** 2).mean())
print(rulefit_rmse)

## RFRuleFitTrain.py
# If you are using Colab, you need to install rulefit library
!pip install rulefit
from rulefit import RuleFit
# Create and Train RuleFit Model
rulefit = RuleFit(tree_generator=RandomForestRegressor(n_estimators = 100))
rulefit.fit(X.values, y, feature_names=X.columns)

## RFFeatureImportance.py
# Plot the feature importances
feat_importances = pd.Series(rf.feature_importances_, index=X.columns)
feat_importances = feat_importances.sort_values(ascending=False)
px.bar(y=feat_importances, x=feat_importances.index, template='ggplot2', width=800)

## RFEvaluateRandomForest.py
# Calculate RMSE
# Note that we did not split train and test datasets to simplify the process
rf_preds = rf.predict(X)
rf_rmse = np.sqrt(((rf_preds - y) ** 2).mean())
print(rf_rmse)

## RFTrainRandomForest.py
from sklearn.ensemble import RandomForestRegressor

y = df.target
X = df.drop('target', axis=1)

# Train a Random Forest Regressor model
rf = RandomForestRegressor(random_state=42, n_estimators=50, n_jobs=-1)
rf.fit(X, y)

## RFcreatePricesHistogram.py
import plotly.express as px
px.histogram(df, x='target',template='ggplot2', width=800, nbins=50)

## RFloadBHPDataset.py
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston

raw_data = load_boston()
df = pd.DataFrame(np.c_[raw_data['data'], raw_data['target']],
                  columns= np.append(raw_data['feature_names'], ['target']))
df.head()
	pd.set_option('display.max_colwidth', 400) # Adjust row width to read the entire rule
	pd.options.display.float_format = '{:.2f}'.format # Round decimals to 2 decimal places
	rules = rulefit.get_rules() # Get the rules
	rules = rules[rules['type']!='linear'] # Eliminate the existing explanatory variables
	rules = rules[rules['coef'] != 0] # eliminate the insignificant rules
	rules = rules.sort_values('support', ascending=False) # Sort the rules based on "support" value
	rules = rules[rules['rule'].str.len()>30] # optional: To see more complex rules, filter the long rules
	rules.iloc[0:5] # Show the first 5 rules
	rules = rulefit.get_rules()
	rules = rules.sort_values('support', ascending=False)
	rules.iloc[:15]
	# The RMSE of the RuleFit model
	rulefit_preds = rulefit.predict(X.values)
	rulefit_rmse = np.sqrt(((rulefit_preds - y) ** 2).mean())
	print(rulefit_rmse)
	# If you are using Colab, you need to install rulefit library
	!pip install rulefit
	from rulefit import RuleFit
	# Create and Train RuleFit Model
	rulefit = RuleFit(tree_generator=RandomForestRegressor(n_estimators = 100))
	rulefit.fit(X.values, y, feature_names=X.columns)
	# Plot the feature importances
	feat_importances = pd.Series(rf.feature_importances_, index=X.columns)
	feat_importances = feat_importances.sort_values(ascending=False)
	px.bar(y=feat_importances, x=feat_importances.index, template='ggplot2', width=800)
	# Calculate RMSE
	# Note that we did not split train and test datasets to simplify the process
	rf_preds = rf.predict(X)
	rf_rmse = np.sqrt(((rf_preds - y) ** 2).mean())
	print(rf_rmse)
	from sklearn.ensemble import RandomForestRegressor

	y = df.target
	X = df.drop('target', axis=1)

	# Train a Random Forest Regressor model
	rf = RandomForestRegressor(random_state=42, n_estimators=50, n_jobs=-1)
	rf.fit(X, y)
	import plotly.express as px
	px.histogram(df, x='target',template='ggplot2', width=800, nbins=50)
	import numpy as np
	import pandas as pd
	from sklearn.datasets import load_boston

	raw_data = load_boston()
	df = pd.DataFrame(np.c_[raw_data['data'], raw_data['target']],
	columns= np.append(raw_data['feature_names'], ['target']))
	df.head()