Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save stephenleo/848b83a9ac2cfc38e25acbe3d34543e1 to your computer and use it in GitHub Desktop.
Save stephenleo/848b83a9ac2cfc38e25acbe3d34543e1 to your computer and use it in GitHub Desktop.
[Medium] Make Money on the side with Data Science!

Make Money on the side with Data Science!

Code for the Medium post Link

import pandas as pd
import numpy as np
import os
import re
from typing import List
from tensorflow.keras.models import load_model
import uvicorn
from fastapi import FastAPI
from fastapi.logger import logger
from preprocess import preprocess
# Load the model
model_path = os.path.join(os.path.dirname(__file__), "models/boyorgirl.h5")
pred_model = load_model(model_path)
# Instantiate the app
app = FastAPI()
# Predict endpoint
@app.post("/predict")
def predict(names: List[str]):
# Step 1: Input is a list of names
logger.info(names)
# Step 2: Split on all non-alphabet characters
split_names = [re.findall(r"\w+", name) for name in names]
names = [item for sublist in split_names for item in sublist]
# Step 3: Keep only first 10 names
names = names[:10]
# Convert to dataframe
pred_df = pd.DataFrame({"name": names})
# Step 4: Preprocess the names
pred_df = preprocess(pred_df)
# Step 5: Run predictions
result = pred_model.predict(np.asarray(pred_df["name"].values.tolist())).squeeze(
axis=1
)
# Step 6: Convert the probabilities to predictions
pred_df["boy_or_girl"] = ["boy" if logit > 0.5 else "girl" for logit in result]
pred_df["probability"] = [logit if logit > 0.5 else 1.0 - logit for logit in result]
# Step 7: Format the output
pred_df["name"] = names
pred_df["probability"] = pred_df["probability"].round(2)
pred_df.drop_duplicates(inplace=True)
return {"response": pred_df.to_dict(orient="records")}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))
def preprocess(names_df):
# Step 1: Lowercase
names_df["name"] = names_df["name"].str.lower()
# Step 2: Split individual characters
names_df["name"] = [list(name) for name in names_df["name"]]
# Step 3: Pad names with spaces to make all names same length
name_length = 50
names_df["name"] = [
(name + [" "] * name_length)[:name_length] for name in names_df["name"]
]
# Step 4: Encode Characters to Numbers
names_df["name"] = [
[max(0.0, ord(char) - 96.0) for char in name] for name in names_df["name"]
]
return names_df
# Use the official lightweight Python image.
# https://hub.docker.com/_/python
FROM python:3.8-slim
# Allow statements and log messages to immediately appear in the Knative logs
ENV PYTHONUNBUFFERED True
# Copy local code to the container image.
ENV APP_HOME /app
WORKDIR $APP_HOME
COPY . ./
# Install production dependencies.
RUN pip install --no-cache-dir -r requirements.txt
# Run the web service on container startup. Here we use the gunicorn
# webserver, with one worker process and 8 threads.
# For environments with multiple CPU cores, increase the number of workers
# to be equal to the cores available.
# Timeout is set to 0 to disable the timeouts of the workers to allow Cloud Run to handle instance scaling.
CMD exec gunicorn -k uvicorn.workers.UvicornWorker --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app
docker build -t boy_or_girl:latest .
docker run -p 8080:8080 -e PORT=8080 --rm boy_or_girl:latest
import requests
import pprint
url = 'http://127.0.0.1:8080/predict'
payload = ['Joe Biden', 'Kamala Harris']
response = requests.post(url, json=payload)
pprint.pprint(response.json())
gcloud run deploy boyorgirl --source . --region us-west1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment