Skip to content

Instantly share code, notes, and snippets.

@salrashid123
Last active August 18, 2023 11:43
Show Gist options
  • Save salrashid123/1509254ac4cef16dcc8e9e2f257599cd to your computer and use it in GitHub Desktop.
Save salrashid123/1509254ac4cef16dcc8e9e2f257599cd to your computer and use it in GitHub Desktop.
Deterministic builds with cog and bazel (ttps://github.com/replicate/cog/issues/1250)

using bazel to build deterministic cog image

the following will build an image hash of

sha256:3db6542dc746aeabaa39d902570430e1d50c416e7fc20b875c10578aa5e62875

(more or less unless copy+paste from gist may add newline, whitespace to the .py files, sources..; but it should create the same hash if copyied exactly)

for me the files were

$ sha256sum *
a582c0418d8662ed74d9951c519ca54e362a994f04214ffab745c02ebca3f181  BUILD.bazel
2a176d9028a0776b1445bf7f4ef88c81fba7a236c592a1b45a33ebe75d1271d8  WORKSPACE
f0e7b0d7a0e60155e15026c9b12288a883862d43d390d5e5de7f3fbcfa39b924  cog.yaml
1ea5964e200315e7cb81c512a79094cf5e5deb3c3ca8e42f9522dd8b615b6525  main.py
77f69552a52a8d59de15799ea10922e0d868b03ef9b8888117fcb4026455910f  predict.py
2e5c26a71f21550adc358f39da9364dbf576a82c53deb1a9d496ef00a2103641  requirements.txt

7011d39ea4f61f4ddb8da99c4addf3fae4209bfda7828adb4698b16283258fbe  resnet50_weights_tf_dim_ordering_tf_kernels.h5

to use make a copy of all the files here (including newlines at end of files blocks below)

  • main.py
  • predict.py
  • requirements.txt
  • WORKSPACE
  • BUILD.bazel
  • cog.yaml

create dev instance on gcp

$ gcloud compute instances create instance-bazel-dev --zone=us-central1-a --machine-type=e2-standard-4 --network-interface=network-tier=PREMIUM,stack-type=IPV4_ONLY,subnet=default --maintenance-policy=MIGRATE --provisioning-model=STANDARD  --create-disk=auto-delete=yes,boot=yes,device-name=instance-bazel-dev,image=projects/debian-cloud/global/images/debian-12-bookworm-v20230814,mode=rw,size=50,type=projects/core-eso/zones/us-central1-a/diskTypes/pd-balanced --no-shielded-secure-boot --shielded-vtpm --shielded-integrity-monitoring --labels=goog-ec-src=vm_add-gcloud --reservation-affinity=any


$ gcloud compute scp cog_bazel.tar instance-bazel-dev:

$ gcloud compute ssh instance-bazel-dev

~/cog_bazel$ tree
.
├── BUILD.bazel
├── cog.yaml
├── main.py
├── predict.py
├── requirements.txt
└── WORKSPACE

# bazel
wget https://github.com/bazelbuild/bazel/releases/download/6.2.1/bazel-6.2.1-linux-x86_64 -O bazel

sudo apt-get install gcc cmake build-essential python3-distutils -y

# python
sudo ln -s  /usr/bin/python3.11 /usr/bin/python

## docker
# follow https://docs.docker.com/engine/install/debian/
#        https://docs.docker.com/engine/install/linux-postinstall/

$ python -V
Python 3.11.2


$ bazel --version
bazel 6.2.1

$ docker --version
Docker version 24.0.5, build ced0996


$ docker login

create repo called docker.io/[your_repo]/cogdemo


# run standalone
wget https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5

bazel run :main

# build local image
## first edit BUILD.bazel and replace with repository= values with [your_repo]
bazel run :server_image
  docker run -p 5000:5000 -t docker.io/salrashid123/cogdemo:server_image


## push to dockerhub
bazel run  :push

then to test an api call

 curl http://localhost:5000/predictions -X POST     -H 'Content-Type: application/json'     -d '{"input": {"image": "https://gist.githubusercontent.com/bfirsh/3c2115692682ae260932a67d93fd94a8/raw/56b19f53f7643bb6c0b822c410c366c3a6244de2/mystery.jpg"}}'


  • main.py
#/usr/bin/python    
# https://github.com/replicate/cog/blob/main/docs/python.md


from cog.server.http import Server

import subprocess
 
process = subprocess.Popen('python -m cog.server.http', shell=True)
process.wait()

# import cog.server.http
# import threading

# print('start')
# from cog.server.http import create_app

# from cog.server.runner import (
#     PredictionRunner,
# )


# runner = PredictionRunner(
#     predictor_ref="predict.py:Predictor", shutdown_event=threading.Event()
# )
# try:
#     runner.setup().get(60)
# finally:
#     runner.shutdown()
# print('done')

  • predict.py
#!/usr/bin/python
from typing import Any
from cog import BasePredictor, Input, Path
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image as keras_image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np

from tensorflow.keras.utils import get_file


WEIGHTS_PATH="https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5"

class Predictor(BasePredictor):
    def setup(self):
        """Load the model into memory to make running multiple predictions efficient"""
      
        weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels.h5',
                                        WEIGHTS_PATH,
                                        cache_subdir='models',
                                        md5_hash='2cb95161c43110f7111970584f804107')
        
    
        self.model = ResNet50(weights=weights_path) 

        ## seems to download a file; its better to use a file thats provided 
        # self.model = ResNet50(weights='imagenet') 
        
        

    # Define the arguments and types the model takes as input
    def predict(self, image: Path = Input(description="Image to classify")) -> Any:
        """Run a single prediction on the model"""
        # Preprocess the image
        img = keras_image.load_img(image, target_size=(224, 224))
        x = keras_image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        # Run the prediction
        preds = self.model.predict(x)
        # Return the top 3 predictions
        return decode_predictions(preds, top=3)[0]

  • requirements.txt
pillow==9.5.0
tensorflow==2.13.0
cog==0.8.6


anyio==3.7.1
attrs==23.1.0
certifi==2023.7.22
charset-normalizer==3.2.0
click==8.1.6
fastapi==0.98.0
h11==0.14.0
httptools==0.6.0
idna==3.4
pydantic==1.10.12
python-dotenv==1.0.0
PyYAML==6.0.1
requests==2.31.0
sniffio==1.3.0
starlette==0.27.0
structlog==23.1.0
typing_extensions==4.7.1
urllib3==2.0.4
uvicorn==0.23.2
uvloop==0.17.0
watchfiles==0.19.0
websockets==11.0.3

wrapt==1.15.0
packaging==23.1
grpcio==1.57.0
opt-einsum==3.3.0
numpy==1.25.2
tensorflow-estimator==2.13.0
protobuf==4.24.0
jax==0.4.14
flatbuffers==23.5.26
setuptools==68.0.0
ml-dtypes==0.2.0
gast==0.5.4
scipy==1.11.1
six==1.16.0
libclang==16.0.6
termcolor==2.3.0
absl-py==1.4.0
google-pasta==0.2.0
keras==2.13.1
tensorboard==2.14.0
h5py==3.9.0
google-auth-oauthlib==1.0.0
tensorboard-data-server==0.7.1
wheel==0.41.1
requests-oauthlib==1.3.1
google-auth==2.22.0
cachetools==5.3.1
pyasn1-modules==0.3.0
pyasn1==0.5.0
rsa==4.9
oauthlib==3.2.2
tensorflow-io-gcs-filesystem==0.33.0
Markdown==3.4.4
Werkzeug==2.3.6
MarkupSafe==2.1.3
astunparse==1.6.3
lit==16.0.6
cmake==3.27.2
importlib-metadata==6.8.0
zipp==3.16.2
exceptiongroup==1.1.3

  • WORKSPACE
workspace(name = "qs")

load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")

http_archive(
    name = "rules_python",
    sha256 = "a644da969b6824cc87f8fe7b18101a8a6c57da5db39caa6566ec6109f37d2141",
    strip_prefix = "rules_python-0.20.0",
    url = "https://github.com/bazelbuild/rules_python/releases/download/0.20.0/rules_python-0.20.0.tar.gz",
)


load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains")
py_repositories()

python_register_toolchains(
    name = "python311",
    python_version = "3.11",
)


load("@rules_python//python:pip.bzl", "pip_parse")

pip_parse(
   name = "my_deps",
   requirements_lock = "//:requirements.txt",
)
load("@my_deps//:requirements.bzl", "install_deps", "requirement")
install_deps()


# ---------------------------------

http_archive(
    name = "io_bazel_rules_docker",
    sha256 = "b1e80761a8a8243d03ebca8845e9cc1ba6c82ce7c5179ce2b295cd36f7e394bf",
    urls = ["https://github.com/bazelbuild/rules_docker/releases/download/v0.25.0/rules_docker-v0.25.0.tar.gz"],
)

load(
    "@io_bazel_rules_docker//repositories:repositories.bzl",
    container_repositories = "repositories",
)

container_repositories()

load("@io_bazel_rules_docker//repositories:deps.bzl", container_deps = "deps")

container_deps()


load("@io_bazel_rules_docker//python3:image.bzl", _py_image_repos = "repositories")
_py_image_repos()


load("@io_bazel_rules_docker//container:pull.bzl", "container_pull")

container_pull(
    name = "distroless_base",
    digest = "sha256:75f63d4edd703030d4312dc7528a349ca34d48bec7bd754652b2d47e5a0b7873",
    registry = "gcr.io",
    repository = "distroless/base",
)


container_pull(
    name = "distroless_python3-debian11",
    digest = "sha256:57dbab565d405ce5ae9c7a8c781c95fa229655cb8381d0e5db4ece28661fa687",
    registry = "gcr.io",
    repository = "distroless/python3-debian11",
)

container_pull(
    name = "python3",
    digest = "sha256:7d57b1fef9b7fda8bf331b971a3ca96c3214313666fafdf26d33f1d0e6399222",
    registry = "docker.io",
    repository = "python",
)


# container_pull(
#     name = "nvidia-cuda",
#     digest = "sha256:c1d07892979445e720a5cf1f5abe6a910f45c6d638bf9997d6a807924eee5190",
#     registry = "docker.io",
#     repository = "nvidia/cuda:12.2.0-devel-ubuntu20.04",
# )


  • BUILD.bazel
load("@rules_python//python:defs.bzl", "py_binary")
load("@io_bazel_rules_docker//python3:image.bzl", "py3_image")

load("@my_deps//:requirements.bzl", "requirement")
load("@io_bazel_rules_docker//container:container.bzl", "container_image", "container_push")


exports_files(["cog.yaml", "resnet50_weights_tf_dim_ordering_tf_kernels.h5"])

LIBS = [

    requirement("pillow"),
    requirement("tensorflow"),

    requirement("cog"),

    requirement("requests"),
    requirement("wrapt"),
    requirement("packaging"),
    requirement("grpcio"),
    requirement("opt-einsum"),
    requirement("numpy"),
    requirement("tensorflow-estimator"),
    requirement("protobuf"),    
    requirement("jax"),       
    requirement("flatbuffers"),
    requirement("setuptools"),
    requirement("ml-dtypes"),  
    requirement("gast"),    
    requirement("scipy"),
    requirement("six"),
    requirement("libclang"),          
    requirement("termcolor"), 
    requirement("absl-py"),  
    requirement("google-pasta"), 
    requirement("keras"),       
    requirement("tensorboard"),
    requirement("h5py"),      
    requirement("google-auth-oauthlib"), 
    requirement("tensorboard-data-server"), 
    requirement("wheel"),                             
    requirement("requests-oauthlib"),    
    requirement("google-auth"),  
    requirement("cachetools"),
    requirement("pyasn1-modules"),    
    requirement("pyasn1"), 
    requirement("rsa"),     
    requirement("oauthlib"),
    requirement("tensorflow-io-gcs-filesystem"),    
    requirement("markdown"),   
    requirement("Werkzeug"),  
    requirement("MarkupSafe"),  
    requirement("astunparse"), 
    requirement("importlib-metadata"), 
    requirement("zipp"), 
    requirement("anyio"), 
    requirement("exceptiongroup"), 


]

container_push(
   name = "push",
   image = ":server_image",
   format = "Docker",
   registry = "docker.io",
   repository = "salrashid123/cogdemo",
   tag = "server_image",
)

container_image(
    name = "server_image",
    base = ":py_image",
    ports = ["5000"],  
    repository = "docker.io/salrashid123/cogdemo",
    symlinks = { "/usr/bin/python": "/usr/local/bin/python"},   
)

py3_image(
    name="py_image",
    srcs = ["main.py", "predict.py"],     
    main="main.py",
    deps = LIBS,
    data = [":cog.yaml", ":resnet50_weights_tf_dim_ordering_tf_kernels.h5"],
    base = "@python3//image",
)


py_binary(
    name = "main",
    srcs = ["main.py", "predict.py"],
    deps = LIBS,
    data = [":cog.yaml", ":resnet50_weights_tf_dim_ordering_tf_kernels.h5"],
    main="main.py",
)


  • cog.yaml
build:
  python_version: "3.11"
  python_packages:
    - pillow==9.5.0
    - tensorflow==2.12.0
predict: "predict.py:Predictor"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment