Steps for deploying a docker image to heroku
docker build ...
heroku create david-docker-staging
docker login --username=_ --password=$(heroku auth:token) registry.heroku.com
docker push
heroku container:release web -a app_name
heroku open
import unittest | |
from sklearn.metrics import precision_score, recall_score | |
from src.train import prepare_data_and_train_model | |
class TestModelMetrics(unittest.TestCase): | |
def test_model_precision_score_should_be_above_threshold(self): | |
model, X_test, Y_test = prepare_data_and_train_model() | |
Y_pred = model.predict(X_test) |
# bad example | |
df = get_data() | |
print(df) | |
# do_other_stuff() | |
# do_some_more_stuff() | |
df.head() | |
print(df.columns) | |
# do_so_much_stuff() | |
model = train_model(df) |
# bad example (a typical data science notebook) | |
See notebook: https://github.com/davified/clean-code-ml/blob/master/notebooks/titanic-notebook-1.ipynb | |
# good example | |
df = impute_nans(df, categorical_columns=['Embarked'], | |
continuous_columns =['Fare', 'Age']) | |
df = add_derived_title(df) | |
df = encode_title(df) | |
df = add_is_alone_column(df) | |
df = add_categorical_columns(df) |
# bad example | |
pd.qcut(df['Fare'], q=4, retbins=True)[1] # returns array([0., 7.8958, 14.4542, 31.275, 512.3292]) | |
df.loc[ df['Fare'] <= 7.90, 'Fare'] = 0 | |
df.loc[(df['Fare'] > 7.90) & (df['Fare'] <= 14.454), 'Fare'] = 1 | |
df.loc[(df['Fare'] > 14.454) & (df['Fare'] <= 31), 'Fare'] = 2 | |
df.loc[ df['Fare'] > 31, 'Fare'] = 3 | |
df['Fare'] = df['Fare'].astype(int) | |
df['FareBand'] = df['Fare'] | |
# good example (after refactoring into functions) |
def unexpectedly_mutate_original_data(cats): | |
_cats = cats.copy() # list.copy() method is a shallow copy. see https://docs.python.org/3/tutorial/datastructures.html | |
for cat in cats: | |
cat['new'] = 'something new' | |
return cats | |
original = [{'name': 'brownie'}, {'name': 'fluffy'}] | |
result = unexpectedly_mutate_original_data(original) | |
print(original) # see that original value is mutated |
# .circleci/config.yml | |
# CircleCI config for deploying docker containers to kubernetes + manual caching on CircleCI | |
version: 2 | |
jobs: | |
build_and_test: | |
working_directory: ~/repo | |
docker: | |
- image: google/cloud-sdk | |
steps: | |
- checkout |
Steps for deploying a docker image to heroku
docker build ...
heroku create david-docker-staging
docker login --username=_ --password=$(heroku auth:token) registry.heroku.com
docker push
heroku container:release web -a app_name
heroku open
# .circleci/config.yml | |
# a script for deploying https://github.com/davified/ci-workshop-app to GCP App Engine | |
version: 2 | |
jobs: | |
train_and_test: | |
docker: | |
- image: circleci/python:3.6.1 | |
working_directory: ~/repo | |
steps: | |
- checkout |
# .circleci/config.yml | |
# A circleci config for deploying https://github.com/davified/ci-workshop-app to heroku | |
version: 2 | |
jobs: | |
train_and_test: | |
docker: | |
- image: circleci/python:3.6.1 | |
working_directory: ~/repo | |
steps: | |
- checkout |
$ git clone https://github.com/big-data-europe/docker-hive
$ cd docker-hive
$ docker-compose up -d
connect to hive-server docker container
$ docker-compose exec hive-server bash
connect (to hive CLI?) using beeline
# /opt/hive/bin/beeline -u jdbc:hive2://localhost:10000