import joblib import os from zipfile import ZipFile from ds_helpers import aws from helpers.model_helpers import create_x_y_split from data.db import get_training_data from modeling.config import TARGET def main(s3_bucket): """ Takes a model from S3, retrains it on new data, and uploads it to S3. :param s3_bucket: S3 bucket where the model lives """ df = get_training_data() df = df.drop(['client_id', 'id', 'meta__inserted_at'], 1) df[TARGET] = np.where(df[target] == 'yes', 1, 0) x, y = create_x_y_split(df, target) aws.download_file_from_s3('model.zip', s3_bucket) os.makedirs('original_model') with ZipFile('model.zip', 'r') as zip_file: zip_file.extractall('original_model') original_model = joblib.load(os.path.join('original_model', 'model.pkl')) retrained_model = original_model.fit(x, y) joblib.dump(retrained_model, 'model.pkl') with ZipFile('model.zip', 'w') as zip_file: zip_file.write('model.pkl') aws.upload_file_to_s3('model.zip', s3_bucket) if __name__ == "__main__": main()