Ben Epstein Ben-Epstein

## iterm-setup.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                Ben-Epstein
                / iterm-setup.md
            
            
              Created
              July 8, 2023 18:22
            
              
                Setup iterm2
              
          
    My personal setup preference (MacOS)

echo "syntax on\nset nu\n" >> ~/.vimrc
sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)"
git clone https://github.com/zsh-users/zsh-autosuggestions.git $ZSH_CUSTOM/plugins/zsh-autosuggestions
git clone https://github.com/zsh-users/zsh-syntax-highlighting.git $ZSH_CUSTOM/plugins/zsh-syntax-highlighting
sed -i '' "s/plugins=(git)/plugins=(git zsh-autosuggestions zsh-syntax-highlighting)/g" ~/.zshrc
git clone --depth=1 https://github.com/romkatv/powerlevel10k.git ${ZSH_CUSTOM:-$HOME/.oh-my-zsh/custom}/themes/powerlevel10k
sed -i '' "s/robbyrussell/powerlevel10k\/powerlevel10k/g" ~/.zshrc

  
## api.json
{"openapi":"3.0.2","info":{"title":"Feature Store API","description":"API for asynchronous and synchronous calls to the feature store","version":"0.1.0"},"paths":{"/health":{"get":{"tags":["Mgmt"],"summary":"Health Check","description":"Health check","operationId":"healthcheck","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"title":"Response Health Check Health Get","type":"string"}}}}}}},"/feature-sets":{"get":{"tags":["Feature Sets"],"summary":"Get Feature Sets","description":"Returns a list of available feature sets","operationId":"get_feature_sets","parameters":[{"required":false,"schema":{"title":"Name","type":"array","items":{"type":"string"},"default":[]},"name":"name","in":"query"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"title":"Response Get Feature Sets Feature Sets Get","type":"array","items":{"$ref":"#/components/schemas/FeatureSet"}}}}},"422":{"description":"Validation Error","content":{"ap

## get_training_set.py
df = fs.get_training_set(
    features = [
        'max_spending_category_30d',
	   'max_purchase_amount_15d',
        'spending_avg_7_day',
        'spending_avg_15_day',
        'spending_avg_60_day',
        'customer_churn',
    ]
)

## End.py
spark.stop()
mlflow.end_run()

## Invoke Deployed Model.sql
%%sql

insert into iris_model (sepal_length, sepal_width, petal_length, petal_width, moment_id) values (5.1, 3.5, 1.4, 0.2, 0);
insert into iris_model (sepal_length, sepal_width, petal_length, petal_width, moment_id) values (6.4, 2.7, 5.3, 2.0, 1);

select * from iris_model;

## Deploy Model.py
schema = splice.getConnection().getSchema()
run_id = mlflow.current_run_id()
splice.dropTableIfExists(schema, 'iris_model')
jid = mlflow.deploy_db(schema, 'iris_model', run_id, create_model_table=True, df=df.select(cols[:-1]), primary_key={'MOMENT_ID':'INT'}, classes=list(data.target_names))
mlflow.watch_job(jid)

## Log MLFlow Info.py
mlflow.log_pipeline_stages(model)
mlflow.log_feature_transformations(model)
mlflow.log_param('maxDepth', model.stages[-1].getOrDefault('maxDepth'))
mlflow.log_param('maxBins', model.stages[-1].getOrDefault('maxBins'))
mlflow.log_model(model, 'spark_dt') # Important!

## Build Spark Model.py
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.classification import DecisionTreeClassifier
from pyspark.ml import Pipeline

va = VectorAssembler(inputCols=cols[:-1], outputCol='features') # Define feature vector
dt = DecisionTreeClassifier() # Define model
pipeline = Pipeline(stages=[va, dt]) # Chain steps together into a full pipeline

train, test = df.randomSplit([0.8,0.2])
model = pipeline.fit(train)

## Load Iris Data into Spark.py
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np

data = load_iris()
cols = [i.replace('(cm)','').strip().replace(' ','_') for i in data.feature_names] + ['label'] # Column name cleanup
pdf = pd.DataFrame(np.c_[data.data, data.target], columns=cols)
df = spark.createDataFrame(pdf)
df.show()

## MLFlow start experiment and run.py
mlflow.set_experiment('Iris_classification')
mlflow.start_run(run_name='Spark Decision Tree')
get_mlflow_ui(mlflow.current_exp_id())
	df = fs.get_training_set(
	features = [
	'max_spending_category_30d',
	'max_purchase_amount_15d',
	'spending_avg_7_day',
	'spending_avg_15_day',
	'spending_avg_60_day',
	'customer_churn',
	]
	)
	%%sql

	insert into iris_model (sepal_length, sepal_width, petal_length, petal_width, moment_id) values (5.1, 3.5, 1.4, 0.2, 0);
	insert into iris_model (sepal_length, sepal_width, petal_length, petal_width, moment_id) values (6.4, 2.7, 5.3, 2.0, 1);

	select * from iris_model;
	schema = splice.getConnection().getSchema()
	run_id = mlflow.current_run_id()
	splice.dropTableIfExists(schema, 'iris_model')
	jid = mlflow.deploy_db(schema, 'iris_model', run_id, create_model_table=True, df=df.select(cols[:-1]), primary_key={'MOMENT_ID':'INT'}, classes=list(data.target_names))
	mlflow.watch_job(jid)
	mlflow.log_pipeline_stages(model)
	mlflow.log_feature_transformations(model)
	mlflow.log_param('maxDepth', model.stages[-1].getOrDefault('maxDepth'))
	mlflow.log_param('maxBins', model.stages[-1].getOrDefault('maxBins'))
	mlflow.log_model(model, 'spark_dt') # Important!
	from pyspark.ml.feature import VectorAssembler
	from pyspark.ml.classification import DecisionTreeClassifier
	from pyspark.ml import Pipeline

	va = VectorAssembler(inputCols=cols[:-1], outputCol='features') # Define feature vector
	dt = DecisionTreeClassifier() # Define model
	pipeline = Pipeline(stages=[va, dt]) # Chain steps together into a full pipeline

	train, test = df.randomSplit([0.8,0.2])
	model = pipeline.fit(train)
	from sklearn.datasets import load_iris
	import pandas as pd
	import numpy as np

	data = load_iris()
	cols = [i.replace('(cm)','').strip().replace(' ','_') for i in data.feature_names] + ['label'] # Column name cleanup
	pdf = pd.DataFrame(np.c_[data.data, data.target], columns=cols)
	df = spark.createDataFrame(pdf)
	df.show()
	mlflow.set_experiment('Iris_classification')
	mlflow.start_run(run_name='Spark Decision Tree')
	get_mlflow_ui(mlflow.current_exp_id())