stephenleo/00_make_money_on_the_side_with_data_science.md

## 00_make_money_on_the_side_with_data_science.md

      
    Raw
  

              00_make_money_on_the_side_with_data_science.md
            
          
    Make Money on the side with Data Science!

Code for the Medium post Link

  
## 01_main.py
import pandas as pd
import numpy as np
import os
import re
from typing import List
from tensorflow.keras.models import load_model

import uvicorn
from fastapi import FastAPI
from fastapi.logger import logger

from preprocess import preprocess

# Load the model
model_path = os.path.join(os.path.dirname(__file__), "models/boyorgirl.h5")
pred_model = load_model(model_path)

# Instantiate the app
app = FastAPI()

# Predict endpoint
@app.post("/predict")
def predict(names: List[str]):
    # Step 1: Input is a list of names
    logger.info(names)

    # Step 2: Split on all non-alphabet characters
    split_names = [re.findall(r"\w+", name) for name in names]
    names = [item for sublist in split_names for item in sublist]

    # Step 3: Keep only first 10 names
    names = names[:10]

    # Convert to dataframe
    pred_df = pd.DataFrame({"name": names})

    # Step 4: Preprocess the names
    pred_df = preprocess(pred_df)

    # Step 5: Run predictions
    result = pred_model.predict(np.asarray(pred_df["name"].values.tolist())).squeeze(
        axis=1
    )

    # Step 6: Convert the probabilities to predictions
    pred_df["boy_or_girl"] = ["boy" if logit > 0.5 else "girl" for logit in result]
    pred_df["probability"] = [logit if logit > 0.5 else 1.0 - logit for logit in result]

    # Step 7: Format the output
    pred_df["name"] = names
    pred_df["probability"] = pred_df["probability"].round(2)
    pred_df.drop_duplicates(inplace=True)

    return {"response": pred_df.to_dict(orient="records")}


if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))

## 02_preprocess.py
def preprocess(names_df):
    # Step 1: Lowercase
    names_df["name"] = names_df["name"].str.lower()

    # Step 2: Split individual characters
    names_df["name"] = [list(name) for name in names_df["name"]]

    # Step 3: Pad names with spaces to make all names same length
    name_length = 50
    names_df["name"] = [
        (name + [" "] * name_length)[:name_length] for name in names_df["name"]
    ]

    # Step 4: Encode Characters to Numbers
    names_df["name"] = [
        [max(0.0, ord(char) - 96.0) for char in name] for name in names_df["name"]
    ]

    return names_df

## 03_Dockerfile
# Use the official lightweight Python image.
# https://hub.docker.com/_/python
FROM python:3.8-slim

# Allow statements and log messages to immediately appear in the Knative logs
ENV PYTHONUNBUFFERED True

# Copy local code to the container image.
ENV APP_HOME /app
WORKDIR $APP_HOME
COPY . ./

# Install production dependencies.
RUN pip install --no-cache-dir -r requirements.txt

# Run the web service on container startup. Here we use the gunicorn
# webserver, with one worker process and 8 threads.
# For environments with multiple CPU cores, increase the number of workers
# to be equal to the cores available.
# Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle instance scaling.
CMD exec gunicorn -k uvicorn.workers.UvicornWorker --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app

## 04_test_docker_image.sh
docker build -t boy_or_girl:latest .
docker run -p 8080:8080 -e PORT=8080 --rm boy_or_girl:latest

## 05_test_docker.py
import requests
import pprint

url = 'http://127.0.0.1:8080/predict'

payload = ['Joe Biden', 'Kamala Harris']

response = requests.post(url, json=payload)
pprint.pprint(response.json())

## 06_gcp_cloud_run_deploy.sh
gcloud run deploy boyorgirl --source . --region us-west1
	import pandas as pd
	import numpy as np
	import os
	import re
	from typing import List
	from tensorflow.keras.models import load_model

	import uvicorn
	from fastapi import FastAPI
	from fastapi.logger import logger

	from preprocess import preprocess

	# Load the model
	model_path = os.path.join(os.path.dirname(__file__), "models/boyorgirl.h5")
	pred_model = load_model(model_path)

	# Instantiate the app
	app = FastAPI()

	# Predict endpoint
	@app.post("/predict")
	def predict(names: List[str]):
	# Step 1: Input is a list of names
	logger.info(names)

	# Step 2: Split on all non-alphabet characters
	split_names = [re.findall(r"\w+", name) for name in names]
	names = [item for sublist in split_names for item in sublist]

	# Step 3: Keep only first 10 names
	names = names[:10]

	# Convert to dataframe
	pred_df = pd.DataFrame({"name": names})

	# Step 4: Preprocess the names
	pred_df = preprocess(pred_df)

	# Step 5: Run predictions
	result = pred_model.predict(np.asarray(pred_df["name"].values.tolist())).squeeze(
	axis=1
	)

	# Step 6: Convert the probabilities to predictions
	pred_df["boy_or_girl"] = ["boy" if logit > 0.5 else "girl" for logit in result]
	pred_df["probability"] = [logit if logit > 0.5 else 1.0 - logit for logit in result]

	# Step 7: Format the output
	pred_df["name"] = names
	pred_df["probability"] = pred_df["probability"].round(2)
	pred_df.drop_duplicates(inplace=True)

	return {"response": pred_df.to_dict(orient="records")}


	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))
	def preprocess(names_df):
	# Step 1: Lowercase
	names_df["name"] = names_df["name"].str.lower()

	# Step 2: Split individual characters
	names_df["name"] = [list(name) for name in names_df["name"]]

	# Step 3: Pad names with spaces to make all names same length
	name_length = 50
	names_df["name"] = [
	(name + [" "] * name_length)[:name_length] for name in names_df["name"]
	]

	# Step 4: Encode Characters to Numbers
	names_df["name"] = [
	[max(0.0, ord(char) - 96.0) for char in name] for name in names_df["name"]
	]

	return names_df
	# Use the official lightweight Python image.
	# https://hub.docker.com/_/python
	FROM python:3.8-slim

	# Allow statements and log messages to immediately appear in the Knative logs
	ENV PYTHONUNBUFFERED True

	# Copy local code to the container image.
	ENV APP_HOME /app
	WORKDIR $APP_HOME
	COPY . ./

	# Install production dependencies.
	RUN pip install --no-cache-dir -r requirements.txt

	# Run the web service on container startup. Here we use the gunicorn
	# webserver, with one worker process and 8 threads.
	# For environments with multiple CPU cores, increase the number of workers
	# to be equal to the cores available.
	# Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle instance scaling.
	CMD exec gunicorn -k uvicorn.workers.UvicornWorker --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app
	docker build -t boy_or_girl:latest .
	docker run -p 8080:8080 -e PORT=8080 --rm boy_or_girl:latest
	import requests
	import pprint

	url = 'http://127.0.0.1:8080/predict'

	payload = ['Joe Biden', 'Kamala Harris']

	response = requests.post(url, json=payload)
	pprint.pprint(response.json())