Skip to content

Instantly share code, notes, and snippets.

import unittest
from sklearn.metrics import precision_score, recall_score
from src.train import prepare_data_and_train_model
class TestModelMetrics(unittest.TestCase):
def test_model_precision_score_should_be_above_threshold(self):
model, X_test, Y_test = prepare_data_and_train_model()
Y_pred = model.predict(X_test)
# bad example
df = get_data()
print(df)
# do_other_stuff()
# do_some_more_stuff()
df.head()
print(df.columns)
# do_so_much_stuff()
model = train_model(df)
# bad example (a typical data science notebook)
See notebook: https://github.com/davified/clean-code-ml/blob/master/notebooks/titanic-notebook-1.ipynb
# good example
df = impute_nans(df, categorical_columns=['Embarked'],
continuous_columns =['Fare', 'Age'])
df = add_derived_title(df)
df = encode_title(df)
df = add_is_alone_column(df)
df = add_categorical_columns(df)
# bad example
pd.qcut(df['Fare'], q=4, retbins=True)[1] # returns array([0., 7.8958, 14.4542, 31.275, 512.3292])
df.loc[ df['Fare'] <= 7.90, 'Fare'] = 0
df.loc[(df['Fare'] > 7.90) & (df['Fare'] <= 14.454), 'Fare'] = 1
df.loc[(df['Fare'] > 14.454) & (df['Fare'] <= 31), 'Fare'] = 2
df.loc[ df['Fare'] > 31, 'Fare'] = 3
df['Fare'] = df['Fare'].astype(int)
df['FareBand'] = df['Fare']
# good example (after refactoring into functions)
def unexpectedly_mutate_original_data(cats):
_cats = cats.copy() # list.copy() method is a shallow copy. see https://docs.python.org/3/tutorial/datastructures.html
for cat in cats:
cat['new'] = 'something new'
return cats
original = [{'name': 'brownie'}, {'name': 'fluffy'}]
result = unexpectedly_mutate_original_data(original)
print(original) # see that original value is mutated
@davified
davified / config.yml
Created February 27, 2019 14:02
CircleCI + Docker + Kubernetes + caching docker images on CircleCI
# .circleci/config.yml
# CircleCI config for deploying docker containers to kubernetes + manual caching on CircleCI
version: 2
jobs:
build_and_test:
working_directory: ~/repo
docker:
- image: google/cloud-sdk
steps:
- checkout
@davified
davified / heroku_docker_deployment.md
Created February 24, 2019 08:42
Steps for deploying a docker image to heroku

Steps for deploying a docker image to heroku

  • docker build ...
  • heroku create david-docker-staging
  • docker login --username=_ --password=$(heroku auth:token) registry.heroku.com
  • docker push
  • heroku container:release web -a app_name
  • heroku open
# .circleci/config.yml
# a script for deploying https://github.com/davified/ci-workshop-app to GCP App Engine
version: 2
jobs:
train_and_test:
docker:
- image: circleci/python:3.6.1
working_directory: ~/repo
steps:
- checkout
@davified
davified / heroku.config.yml
Last active February 13, 2019 06:40
Sample CircleCI config for python ML project
# .circleci/config.yml
# A circleci config for deploying https://github.com/davified/ci-workshop-app to heroku
version: 2
jobs:
train_and_test:
docker:
- image: circleci/python:3.6.1
working_directory: ~/repo
steps:
- checkout
@davified
davified / hive-docker-instructions.md
Last active September 19, 2018 01:35
hive-docker-instructions
$ git clone https://github.com/big-data-europe/docker-hive
$ cd docker-hive
$ docker-compose up -d

connect to hive-server docker container
$ docker-compose exec hive-server bash

connect (to hive CLI?) using beeline
# /opt/hive/bin/beeline -u jdbc:hive2://localhost:10000