jhw/.BERT_WORD_ALIGNMENT.md

## .BERT_WORD_ALIGNMENT.md

      
    Raw
  

              .BERT_WORD_ALIGNMENT.md
            
          
    I am basically totally fed up with Docker and Sagemaker at this point
I have finally managed to create a local Docker contain whichg conforms to the expected Sagemaker interface - ie has /ping [GET] and /invocations [POST] endpoints, and has a serve executable in /usr/local/bin which starts a webserver
I then created a Sagemaker model/endpoint_config/endpoint combo using boto3
However endpoint creation fails due to
exec /usr/local/bin/serve: exec format error

which seems to indicate a compatability issue between the Mac M1 on which the container was built, and the Amazon Linux machine on which it runs
So much for write once, run anywhere!

Docker seem to have a solution for this called build images -
docker buildx version
docker buildx create --name mybuilder --use
docker buildx inspect --bootstrap
docker buildx build --platform linux/amd64 -t bert-word-alignment . --load
docker image inspect bert-word-alignment --format '{{.Architecture}}'

Howver this seems to want to suck in an enormous amount of Python nvidia cuda dependencies, and the resulting image is gigantic; so much so that the webserver seems to run out of memory when the image is run

So I'm going off elsewhere to see if I can build the image on Amazon Linux via Codebuild

As a parting shot I tried to replace the model/endpoint_config/endpoint creation routines with a Cloudformation file; but ran into some errors regarding the maximum permitted length of entity name; so giving up and moving on

The good bits here are that this gives a decent overview of Docker, and also managed to build a working container with webserver; one that is probably compatible with Sagemaker, if only the container can be built on a compatible OS

  
## .gitignore
*.pyc
__pycache__
env
tmp
model
tokenizer

## app.py
"""
Custom images must provide their own webserver, with port 8080 and GET ping and POST invocations endpoints

This webserver calls inference.py which follows standard sagemaker entry_point conventions, but as a custom webserver you could define your own convention
"""

from flask import Flask, request, Response

from inference import model_fn, input_fn, predict_fn, output_fn

app = Flask(__name__)

model = None

@app.before_request
def load_model():
    global model
    if not model:
        model = model_fn('.')

@app.route('/ping', methods=['GET'])
def ping():
    return '', 200

@app.route('/invocations', methods=['POST'])
def invocations():
    if not model:
        return Response(response='Model not loaded', status=503)
    input_data = input_fn(request.data, request.content_type)
    predictions = predict_fn(input_data, model)
    output = output_fn(predictions, request.accept_mimetypes)
    return Response(output,
                    status=200,
                    mimetype='application/json')

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8080)

## create_endpoint.py
import boto3, sys

if __name__ == "__main__":
    try:
        if len(sys.argv) < 3:
            raise RuntimeError("please enter endpoint config name, endpoint name")
        endpoint_config_name, endpoint_name = sys.argv[1:3]
        sagemaker = boto3.client('sagemaker')
        print (sagemaker.create_endpoint(
            EndpointName = endpoint_name,
            EndpointConfigName = endpoint_config_name
        ))
    except RuntimeError as error:
        print(f"Error: {error}")

## create_endpoint_config.py
import boto3, sys

if __name__ == "__main__":
    try:
        if len(sys.argv) < 3:
            raise RuntimeError("please enter model name, endpoint config name")
        model_name, endpoint_config_name = sys.argv[1:3]
        sagemaker = boto3.client('sagemaker')
        print (sagemaker.create_endpoint_config(
            EndpointConfigName = endpoint_config_name,
            ProductionVariants=[
                {
                    'VariantName': 'AllTraffic',
                    'ModelName': model_name,
                    'InitialInstanceCount': 1,
                    'InstanceType': 'ml.m5.large',
                    'InitialVariantWeight': 1
                }
            ]
        ))

    except RuntimeError as error:
        print(f"Error: {error}")

## create_model.py
import boto3, os, sys

def repository_uri(repo_name):
    account_id, region = (os.environ["AWS_ACCOUNT_ID"],
                          os.environ["AWS_REGION"])
    return f"{account_id}.dkr.ecr.{region}.amazonaws.com/{repo_name}"

if __name__ == "__main__":
    try:
        if len(sys.argv) < 3:
            raise RuntimeError("please enter model name, role arn")
        model_name, role_arn = sys.argv[1:3]
        repo_uri = repository_uri(model_name)
        version_tag = "latest"
        image_uri = f"{repo_uri}:{version_tag}"
        sagemaker = boto3.client('sagemaker')
        print (sagemaker.create_model(
            ModelName = model_name,
            ExecutionRoleArn = role_arn,
            PrimaryContainer={
                'Image': image_uri
            }
        ))
    except RuntimeError as error:
        print (f"Error: {error}")


## create_repo.py
import boto3, sys

if __name__ == "__main__":
    try:
        if len(sys.argv) < 2:
            raise RuntimeError("please enter repo name")
        repo_name = sys.argv[1]
        ecr = boto3.client("ecr")
        print (ecr.create_repository(repositoryName = repo_name))
    except RuntimeError as error:
        print (f"Error: {error}")

## create_role.py
import boto3, json, sys

TrustRelationship = {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Principal": {
                "Service": "sagemaker.amazonaws.com"
            },
            "Action": "sts:AssumeRole"
        }
    ]
}

"""
Not sure Cloudwatch strictly required as logging seems to be a Cloudformation- like process in which Sagemaker keeps its own logs and yields them in an call to describe_xxx
"""

SagemakerFullAccessPolicyArn = 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess'

CloudwatchFullAccessPolicyArn = 'arn:aws:iam::aws:policy/CloudWatchFullAccess'

if __name__ == "__main__":
    try:
        if len(sys.argv) < 2:
            raise RuntimeError("please enter model name")
        model_name = sys.argv[1]
        role_name = f"{model_name}-sagemaker-model-role"
        iam = boto3.client('iam')
        print (iam.create_role(
            RoleName=role_name,
            AssumeRolePolicyDocument=json.dumps(TrustRelationship),
            MaxSessionDuration=3600
        ))
        for policy_arn in  [SagemakerFullAccessPolicyArn,
                            CloudwatchFullAccessPolicyArn]:
            print (iam.attach_role_policy(
                RoleName=role_name,
                PolicyArn=policy_arn
            ))
    except RuntimeError as error:
        print(f"Error: {error}")

## delete_endpoint.py
import boto3, sys

if __name__ == "__main__":
    try:
        if len(sys.argv) < 2:
            raise RuntimeError("please enter endpoint name")
        endpoint_name = sys.argv[1]
        sagemaker = boto3.client('sagemaker')
        print (sagemaker.delete_endpoint(EndpointName=endpoint_name))
    except RuntimeError as error:
        print(f"Error: {error}")

## delete_endpoint_config.py
import boto3, sys

if __name__ == "__main__":
    try:
        if len(sys.argv) < 2:
            raise RuntimeError("please enter endpoint config name")
        endpoint_config_name = sys.argv[1]
        sagemaker = boto3.client('sagemaker')
        print (sagemaker.delete_endpoint_config(
            EndpointConfigName = endpoint_config_name
        ))
    except RuntimeError as error:
        print (f"Error: {error}")


## delete_model.py
import boto3, sys

if __name__ == "__main__":
    try:
        if len(sys.argv) < 2:
            raise RuntimeError("please enter model name")
        model_name = sys.argv[1]
        sagemaker = boto3.client('sagemaker')
        print (sagemaker.delete_model(
            ModelName = model_name
        ))
    except RuntimeError as error:
        print (f"Error: {error}")


## delete_repo.py
import boto3, sys

if __name__ == "__main__":
    try:
        if len(sys.argv) < 2:
            raise RuntimeError("please enter repo name")
        repo_name = sys.argv[1]
        ecr = boto3.client("ecr")
        print (ecr.delete_repository(repositoryName = repo_name,
                                     force = True)) # delete with images
    except RuntimeError as error:
        print (f"Error: {error}")

## delete_role.py
import boto3, sys

"""
Attached policies have to be deleted before a role can be deleted, but inline policies will be deleted as part of the role
"""

def detach_attached_policies(iam, role_name):
    paginator = iam.get_paginator('list_attached_role_policies')
    for response in paginator.paginate(RoleName=role_name):
        for policy in response['AttachedPolicies']:
            iam.detach_role_policy(RoleName=role_name,
                                   PolicyArn=policy['PolicyArn'])

if __name__ == "__main__":
    try:
        if len(sys.argv) < 2:
            raise RuntimeError("please enter role name")
        role_name = sys.argv[1]
        iam = boto3.client('iam')
        detach_attached_policies(iam, role_name)
        print (iam.delete_role(RoleName=role_name))
    except RuntimeError as error:
        print (f"Error: {error}")

## Dockerfile
# Use Python 3.10 slim image from the Python Software Foundation
FROM python:3.10-slim

# Upgrade pip
RUN pip install --upgrade pip

# Set the working directory to the root of the container
WORKDIR /app

# Copy files into the image
COPY pipeline.py .
COPY inference.py .
COPY app.py .
COPY serve.sh .

# Copy directories into the image
COPY model model/
COPY tokenizer tokenizer/

# Create requirements.txt file
RUN echo "torch\ntransformers\nflask\ngunicorn" > requirements.txt

# Install packages from requirements.txt
RUN pip install -r requirements.txt

# Expose port 8080
EXPOSE 8080

# Make the serve script executable
RUN chmod +x serve.sh

# Make a symbolic link to ensure 'serve' is available globally
RUN ln -s /app/serve.sh /usr/local/bin/serve

# Command to run when starting the container
CMD ["serve"]

## dump_model.py
import os, transformers, warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

if __name__ == "__main__":
    if not os.path.exists("tmp"):
        os.mkdir("tmp")
    model = transformers.BertModel.from_pretrained('bert-base-multilingual-cased')
    model.save_pretrained('./model')
    tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-multilingual-cased')
    tokenizer.save_pretrained('./tokenizer')


## inference.py
"""
inference.py follows the standard interface pattern accepted by sagemaker

Pre- built images contain their own webserver and inference.py can be specified using the sagemaker.model.Model entry_point parameter, slotting in neatly as a a request handler

Custom images need to define their own webserver, again following the pattern acceptable to sagemaker, but accordingly don't necessarily need to follow the same format for inference.py

However it can be useful to follow the same pattern by way of convention; if the community were ever to read this code, or if AWS were ever to provide a custom image containing BertModel and BertTokenizer
"""

from transformers import BertModel, BertTokenizer

from pipeline import run_pipeline

import json, os

def model_fn(model_dir):
    model_path = os.path.join(model_dir, "model")
    tokenizer_path = os.path.join(model_dir, "tokenizer")
    model = BertModel.from_pretrained(model_path)
    tokenizer = BertTokenizer.from_pretrained(tokenizer_path)
    return {"model": model,
            "tokenizer": tokenizer}

def input_fn(request_body, request_content_type):
    if request_content_type == 'application/json':
        input_data = json.loads(request_body)
        return input_data
    else:
        raise ValueError("Unsupported content type: {}".format(request_content_type))

def predict_fn(input_data, model_artifacts):
    return run_pipeline(model = model_artifacts['model'],
                        tokenizer = model_artifacts['tokenizer'],
                        src = input_data["src"],
                        tgt = input_data["tgt"])

def output_fn(prediction_output, accept):
    if str(accept) == "application/json":
        response = prediction_output
        return json.dumps(response)
    else:
        raise ValueError("Unsupported accept type: {}".format(accept))

if __name__ == "__main__":
    pass

## list_custom_policies.py
import boto3, os

if __name__ == "__main__":
    iam = boto3.client('iam')
    paginator = iam.get_paginator('list_policies')
    account_id = os.environ["AWS_ACCOUNT_ID"]
    for page in paginator.paginate():
        for policy in page['Policies']:
            if account_id in policy["Arn"]:
                print (f"{policy['PolicyName']}\t{policy['Arn']}")

## list_endpoint_configs.py
import boto3

if __name__ == "__main__":
    sagemaker = boto3.client("sagemaker")
    paginator = sagemaker.get_paginator('list_endpoint_configs')
    for page in paginator.paginate():
        for config in page['EndpointConfigs']:
            print(f"Endpoint Config Name: {config['EndpointConfigName']}")

## list_endpoints.py
import boto3

if __name__ == "__main__":
    sagemaker = boto3.client("sagemaker")
    paginator = sagemaker.get_paginator('list_endpoints')
    for page in paginator.paginate():
        for endpoint in page['Endpoints']:
            response = sagemaker.describe_endpoint(EndpointName=endpoint['EndpointName'])
            endpoint_config_name = response['EndpointConfigName']
            endpoint_config = sagemaker.describe_endpoint_config(EndpointConfigName=endpoint_config_name)
            for variant in endpoint_config['ProductionVariants']:
                print(f"{endpoint['EndpointName']}\t{response['EndpointStatus']}")

## list_images.py
import boto3, sys

if __name__ == "__main__":
    try:
        if len(sys.argv) < 2:
            raise RuntimeError("please enter repo name")
        repo_name = sys.argv[1]
        ecr = boto3.client("ecr")
        resp = ecr.list_images(repositoryName = repo_name)
        if "imageIds" in resp:
            for image in resp["imageIds"]:
                if "imageTag" in image:
                    print (image["imageTag"])
    except RuntimeError as error:
        print (f"Error: {error}")

## list_models.py
import boto3

if __name__ == "__main__":
    sagemaker = boto3.client("sagemaker")
    resp = sagemaker.list_models()
    for model in resp["Models"]:
        print (model["ModelName"])

## list_repos.py
import boto3

if __name__ == "__main__":
    ecr = boto3.client("ecr")
    paginator = ecr.get_paginator('describe_repositories')
    for page in paginator.paginate():
        for repo in page['repositories']:
            print(f"{repo['repositoryName']}")

## list_role_policies.py
import sys
import boto3

def list_attached_policies(iam, role_name):
    print ("--- attached policies ---")
    paginator = iam.get_paginator('list_attached_role_policies')
    for response in paginator.paginate(RoleName=role_name):
        for policy in response['AttachedPolicies']:
            print(f"{policy['PolicyName']}")

def list_inline_policies(iam, role_name):
    print ("--- inline policies ---")
    paginator = iam.get_paginator('list_role_policies')
    for response in paginator.paginate(RoleName=role_name):
        for policy_name in response['PolicyNames']:
            print(f"{policy_name}")

if __name__ == "__main__":
    try:
        if len(sys.argv) != 2:
            raise RuntimeError("please enter role name")
        role_name = sys.argv[1]
        iam = boto3.client('iam')
        list_attached_policies(iam, role_name)
        list_inline_policies(iam, role_name)
    except RuntimeError as error:
        print (f"Error: {error}")


## list_roles.py
import boto3

if __name__ == "__main__":
    iam = boto3.client('iam')
    paginator = iam.get_paginator('list_roles')
    for page in paginator.paginate():
        for role in page['Roles']:
            print (f"{role['RoleName']}\t{role['Arn']}")

## NOTES.md

      
    Raw
  

              NOTES.md
            
          
    workflow 13/05/24


create local container
test local container
create ecr repo
push image to ecr repo
create role for model creation
create sagemaker model
create sagemaker endpoint config
create sagemaker endpoint

docker debugging 13/05/24


docker history bert-word-alignment
docker save -o bert-word-alignment.tar bert-word-alignment

endpoint creation failure 13/05/24

(env) jhw@Justins-Air 76a2b92c89a951552e9e04f98843485c % aws sagemaker describe-endpoint --endpoint-name bwa-endpoint

|                                                                             DescribeEndpoint                                                                              |
+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------+
|  CreationTime      |  1715610551.535                                                                                                                                      |
|  EndpointArn       |  arn:aws:sagemaker:eu-west-1:119552584133:endpoint/bwa-endpoint                                                                                      |
|  EndpointConfigName|  bwa-endpoint-config                                                                                                                                 |
|  EndpointName      |  bwa-endpoint                                                                                                                                        |
|  EndpointStatus    |  Failed                                                                                                                                              |
|  FailureReason     |  CannotStartContainerError. Please ensure the model container for variant AllTraffic starts correctly when invoked with 'docker run  serve'   |
|  LastModifiedTime  |  1715610825.116                                                                                                                                      |
+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------+
docker image management 11/05/24


docker build -t bert-word-alignment .
docker images
docker run -p 8080:8080 bert-word-alignment serve # <-- following sagemaker process
docker ps
docker stop [process id]
docker ps -a
docker container prune
docker rmi bert-word-alignment

dockerfile 11/05/24

I want you to create a Dockerfile for me
Start with a Python 3.10 slim image from the PSF
Install and upgrade pip within the image
Set the current directory as the root directory
Copy the following files into the image - pipeline.py, inference.py, app.py
Copy the following directories into the image, including their contents - model, tokenizer
Create a requirements.txt file with the following packages - pytorch, transformers, flask, gunicorn
pip- install requirements.txt within the image
Export port 8080 from within the image
Add a command such that when the image is run, a webserver is automatically started with the following command - gunicorn -b 0.0.0.0:8080 "app:app"
docker 11/05/24


create Dockerfile
build image -> docker build -f MyDockerfile -t my-python-app .
docker run my-python-app

workflow 09/05/24


docker container to contain models and inference.py script


inference.py loads models, contains pipeline logic


requirements.txt lists torch, transformers etc


need script to build docker container


test docker container locally

maybe flask version first as easier to test?


consider adding sagemaker and ecr to deployment profile


script to push docker image to ecr


script to create sagemaker model using sagemaker.model.Model

requires role with sagemaker permissions


script to create sagemaker endpoint


script to test pinging sagemaker endpoint


resources 03/05/24


https://stackoverflow.com/questions/59615759/high-precision-word-alignment-algorithm-in-pytho
https://stackoverflow.com/a/66662014/124179
https://colab.research.google.com/drive/1205ubqebM0OsZa1nRgbGJBtitgHqIVv6?usp=sharing#scrollTo=FfDM0w2kfHyJ


## ping_container.py
import boto3, json, urllib.request

def translate_text(text, source_language_code = "ru", target_language_code = "en"):
    translate = boto3.client('translate')
    result = translate.translate_text(
        Text=text,
        SourceLanguageCode=source_language_code,
        TargetLanguageCode=target_language_code
    )
    return result['TranslatedText']

if __name__ == "__main__":
    try:
        import sys
        if len(sys.argv) < 2:
            raise RuntimeError("please enter Russian text")
        src = sys.argv[1]
        tgt = translate_text(src)
        request_body = json.dumps({
            "src": src,
            "tgt": tgt,
        })
        req = urllib.request.Request("http://localhost:8080/invocations",
                                     data=request_body.encode("utf-8"),
                                     headers={"Content-Type": "application/json",
                                              "Accept": "application/json"},
                                     method='POST')

        pairs = json.loads(urllib.request.urlopen(req).read())
        def format_text(text, n = 48):
            return text[:n] if len(text) > n else text+" ".join(["" for i in range(n-len(text))])
        for text, translation in pairs:
            print (f"{format_text(text)}\t{format_text(translation)}")
    except RuntimeError as error:
        print ("Error: %s" % str(error))


## pipeline.py
"""
https://colab.research.google.com/drive/1205ubqebM0OsZa1nRgbGJBtitgHqIVv6?usp=sharing#scrollTo=FfDM0w2kfHyJ
"""

import itertools, torch

def init_ids(tokenizer, wid):
    return tokenizer.prepare_for_model(list(itertools.chain(*wid)),
                                       return_tensors='pt',
                                       model_max_length=tokenizer.model_max_length,
                                       truncation=True)['input_ids']

def init_sub2word_map(token):
    sub2word_map = []
    for i, word_list in enumerate(token):
        sub2word_map += [i for x in word_list]
    return sub2word_map

def preprocess(tokenizer, src, tgt):
    sent = {k: v.strip().split() for k, v in [("src", src), ("tgt", tgt)]}
    token = {k: [tokenizer.tokenize(word) for word in sent[k]] for k in ["src", "tgt"]}
    wid = {k: [tokenizer.convert_tokens_to_ids(x) for x in token[k]] for k in ["src", "tgt"]}
    ids = {k: init_ids(tokenizer, wid[k]) for k in ["src", "tgt"]}
    sub2word_map = {k: init_sub2word_map(token[k]) for k in ["src", "tgt"]}
    return {
        "sent": sent,
        "ids": ids,
        "sub2word_map": sub2word_map
    }

def run_model(model, struct, align_layer = 8, threshold = 1e-3):
    model.eval()
    with torch.no_grad():
        out = {k: model(struct["ids"][k].unsqueeze(0),
                        output_hidden_states=True)[2][align_layer][0, 1:-1]
               for k in ["src", "tgt"]}
        dot_prod = torch.matmul(out["src"], out["tgt"].transpose(-1, -2))
        softmax_srctgt = torch.nn.Softmax(dim=-1)(dot_prod)
        softmax_tgtsrc = torch.nn.Softmax(dim=-2)(dot_prod)
        softmax_inter = (softmax_srctgt > threshold)*(softmax_tgtsrc > threshold)
    return torch.nonzero(softmax_inter, as_tuple=False)

def align_words(struct, align_subwords):
    align_words = set()
    for i, j in align_subwords:
        align_words.add((struct["sub2word_map"]["src"][i],
                         struct["sub2word_map"]["tgt"][j]))
    return [(struct["sent"]["src"][i],
             struct["sent"]["tgt"][j])
            for i, j in sorted(align_words)]

class Buffer:

    def __init__(self, term = [], translation = []):
        self.term = term if isinstance(term, list) else [term]
        self.translation = translation if isinstance(translation, list) else [translation]

    def matches_term(self, term):
        return (self.term != [] and
                self.term[-1] == term)

    def matches_translation(self, translation):
        return (self.translation != [] and
                self.translation[-1] == translation)

    @property
    def is_populated(self):
        return (self.term != [] and
                self.translation != [])

    def render(self):
        return (" ".join(self.term),
                " ".join(self.translation))

class Terms(list):

    def __init__(self):
        list.__init__(self)
        self.buf = Buffer()

    def add(self, term, translation):
        if (self.buf.matches_term(term) and
            self.buf.matches_translation(translation)):
            pass
        elif self.buf.matches_translation(translation):
            self.buf.term.append(term)
        elif self.buf.matches_term(term):
            self.buf.translation.append(translation)
        else:
            if self.buf.is_populated:
                self.append(self.buf.render())
            self.buf = Buffer(term = term,
                              translation = translation)

def cluster_terms(terms):
    clustered = Terms()
    for text, translation in terms:
        clustered.add(text, translation)
    if clustered.buf.is_populated:
        clustered.append(clustered.buf.render())
    return clustered

def run_pipeline(tokenizer, model, src, tgt):
    struct = preprocess(tokenizer, src, tgt)
    align_subwords = run_model(model, struct)
    return cluster_terms(align_words(struct, align_subwords))

if __name__ == "__main__":
    pass


## push_image.py
import boto3, base64, os, subprocess, sys

def list_repos(ecr):
    paginator = ecr.get_paginator('describe_repositories')
    repo_names = []
    for page in paginator.paginate():
        for repo in page['repositories']:
            repo_names.append(repo['repositoryName'])
    return repo_names

def create_repo(ecr, repo_name):
    ecr.create_repository(repositoryName = repo_name)

def get_auth_token(ecr):
    token = ecr.get_authorization_token()
    token_data = token['authorizationData'][0]
    token_b64 = token_data['authorizationToken']
    decoded_token = base64.b64decode(token_b64).decode()
    return decoded_token.split(':')[1]

def repository_uri(repo_name):
    account_id, region = (os.environ["AWS_ACCOUNT_ID"],
                          os.environ["AWS_REGION"])
    return f"{account_id}.dkr.ecr.{region}.amazonaws.com/{repo_name}"

def login(image_name, auth_token):
    repo_uri = repository_uri(image_name)
    login_command = [
        "docker", "login",
        "--username", "AWS",
         "--password", auth_token,
        repo_uri
    ]
    result = subprocess.run(login_command, capture_output=True, text=True)
    if result.returncode != 0 :
        raise RuntimeError(str(result.stderr))

def tag_and_push_image(image_name, version_tag='latest'):
    repo_uri = repository_uri(image_name)
    full_tag = f"{repo_uri}:{version_tag}"
    tag_command = ['docker', 'tag', image_name, full_tag]
    result = subprocess.run(tag_command, capture_output=True, text=True)
    if result.returncode != 0 :
        raise RuntimeError(str(result.stderr))
    push_command = ['docker', 'push', full_tag]
    result = subprocess.run(push_command, capture_output=True, text=True)
    if result.returncode != 0 :
        raise RuntimeError(str(result.stderr))

if __name__ == "__main__":
    try:
        if len(sys.argv) < 2:
            raise RuntimeError("please enter image name")
        image_name = sys.argv[1]
        ecr = boto3.client('ecr')
        auth_token = get_auth_token(ecr)
        print ("listing repos")
        repo_names = list_repos(ecr)
        if image_name not in repo_names:
            print ("creating repo")
            create_repo(ecr = ecr,
                        repo_name = image_name)
        print ("logging in")
        login(image_name = image_name,
              auth_token = auth_token)
        print ("tagging and pushing image")
        tag_and_push_image(image_name)
        print ("done")
    except RuntimeError as error:
        print (f"Error: {error}")


## requirements.txt
awscli
boto3
botocore
flask
gunicorn
torch
transformers

## run_server.sh
#!/usr/bin/env bash

# gunicorn -w 4 -b 0.0.0.0:8080 "app:app"

gunicorn -b 0.0.0.0:8080 "app:app"

## search_policies.py
import boto3, re, sys

if __name__ == "__main__":
    try:
        if len(sys.argv) < 2:
            raise RuntimeError("please enter search term")
        term = sys.argv[1]
        iam = boto3.client('iam')
        paginator = iam.get_paginator('list_policies')
        for page in paginator.paginate():
            for policy in page['Policies']:
                if re.search(term, policy["PolicyName"], re.I):
                    print (f"{policy['PolicyName']}\t{policy['Arn']}")
    except RuntimeError as error:
        print (f"Error: {error}")

## search_roles.py
import boto3, re, sys

if __name__ == "__main__":
    try:
        if len(sys.argv) < 2:
            raise RuntimeError("please enter search term")
        term = sys.argv[1]
        iam = boto3.client('iam')
        paginator = iam.get_paginator('list_roles')
        for page in paginator.paginate():
            for role in page['Roles']:
                if re.search(term, role["RoleName"], re.I):
                    print (f"{role['RoleName']}\t{role['Arn']}")
    except RuntimeError as error:
        print (f"Error: {error}")

## serve.sh
#!/bin/bash

# Example server startup script for Gunicorn serving a Flask application

# Start Gunicorn with specified number of workers and bind to the necessary port
exec gunicorn -w 4 -b 0.0.0.0:8080 app:app

## setenv.sh
#!/usr/bin/env bash

export AWS_PROFILE=woldeploy
export AWS_DEFAULT_OUTPUT=table
export AWS_REGION=eu-west-1
export AWS_ACCOUNT_ID=119552584133


## show_endpoint.sh
#!/bin/bash

aws sagemaker describe-endpoint --endpoint-name bwa-endpoint


## TODO.md

      
    Raw
  

              TODO.md
            
          
    short

exec /usr/local/bin/serve: exec format error


ping endpoint


README to show full workflow


medium

thoughts


back- check returned tokens against sent tokens?

seems to be more trouble than it is worth


test switching src and target?

makes no difference


done


remove path
copy serve.sh to /usr/local/bin/serve
test locally
clean and redeploy

The primary container for production variant AllTraffic did not pass the ping health check. Please check CloudWatch logs for this endpoint.

https://eu-west-1.console.aws.amazon.com/cloudwatch/home?region=eu-west-1#logsV2:log-groups/log-group/%2Faws%2Fsagemaker%2FEndpoints%2Fbwa-endpoint


script to debug model

CannotStartContainerError. Please ensure the model container for variant AllTraffic starts correctly when invoked with 'docker run <image> serve'   |


test creation of endpoint config
add cloudwatch to role
separate creation of endpoint config and endpoint
add cloudwatch logs to model creation role
create endpoint
delete endpoint
clean up endpoint configs
show endpoint config, show endpoint
delete model
list endpoints
create model w/ role
create model policy
list role policies
detach policies and delete role
list models
push_image.py to check for and create repo
delete_image.py
list_images.py
test_login.py
list_repos.py
create_repo.py
delete_repo.py
move to gists/aws
add workers to gunicorn start, with note re $NUM_WORKERS
create dockerfile
gunicorn
run_inference.py
concatenate same- token results
complete pipeline refactoring
move translations into separate pipeline
clean up word_alignment code
translate using AWS
pass input text from command line
	"""
	Custom images must provide their own webserver, with port 8080 and GET ping and POST invocations endpoints

	This webserver calls inference.py which follows standard sagemaker entry_point conventions, but as a custom webserver you could define your own convention
	"""

	from flask import Flask, request, Response

	from inference import model_fn, input_fn, predict_fn, output_fn

	app = Flask(__name__)

	model = None

	@app.before_request
	def load_model():
	global model
	if not model:
	model = model_fn('.')

	@app.route('/ping', methods=['GET'])
	def ping():
	return '', 200

	@app.route('/invocations', methods=['POST'])
	def invocations():
	if not model:
	return Response(response='Model not loaded', status=503)
	input_data = input_fn(request.data, request.content_type)
	predictions = predict_fn(input_data, model)
	output = output_fn(predictions, request.accept_mimetypes)
	return Response(output,
	status=200,
	mimetype='application/json')

	if __name__ == '__main__':
	app.run(host='0.0.0.0', port=8080)
	import boto3, sys

	if __name__ == "__main__":
	try:
	if len(sys.argv) < 3:
	raise RuntimeError("please enter endpoint config name, endpoint name")
	endpoint_config_name, endpoint_name = sys.argv[1:3]
	sagemaker = boto3.client('sagemaker')
	print (sagemaker.create_endpoint(
	EndpointName = endpoint_name,
	EndpointConfigName = endpoint_config_name
	))
	except RuntimeError as error:
	print(f"Error: {error}")
	import boto3, os, sys

	def repository_uri(repo_name):
	account_id, region = (os.environ["AWS_ACCOUNT_ID"],
	os.environ["AWS_REGION"])
	return f"{account_id}.dkr.ecr.{region}.amazonaws.com/{repo_name}"

	if __name__ == "__main__":
	try:
	if len(sys.argv) < 3:
	raise RuntimeError("please enter model name, role arn")
	model_name, role_arn = sys.argv[1:3]
	repo_uri = repository_uri(model_name)
	version_tag = "latest"
	image_uri = f"{repo_uri}:{version_tag}"
	sagemaker = boto3.client('sagemaker')
	print (sagemaker.create_model(
	ModelName = model_name,
	ExecutionRoleArn = role_arn,
	PrimaryContainer={
	'Image': image_uri
	}
	))
	except RuntimeError as error:
	print (f"Error: {error}")
	import boto3, json, sys

	TrustRelationship = {
	"Version": "2012-10-17",
	"Statement": [
	{
	"Effect": "Allow",
	"Principal": {
	"Service": "sagemaker.amazonaws.com"
	},
	"Action": "sts:AssumeRole"
	}
	]
	}

	"""
	Not sure Cloudwatch strictly required as logging seems to be a Cloudformation- like process in which Sagemaker keeps its own logs and yields them in an call to describe_xxx
	"""

	SagemakerFullAccessPolicyArn = 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess'

	CloudwatchFullAccessPolicyArn = 'arn:aws:iam::aws:policy/CloudWatchFullAccess'

	if __name__ == "__main__":
	try:
	if len(sys.argv) < 2:
	raise RuntimeError("please enter model name")
	model_name = sys.argv[1]
	role_name = f"{model_name}-sagemaker-model-role"
	iam = boto3.client('iam')
	print (iam.create_role(
	RoleName=role_name,
	AssumeRolePolicyDocument=json.dumps(TrustRelationship),
	MaxSessionDuration=3600
	))
	for policy_arn in [SagemakerFullAccessPolicyArn,
	CloudwatchFullAccessPolicyArn]:
	print (iam.attach_role_policy(
	RoleName=role_name,
	PolicyArn=policy_arn
	))
	except RuntimeError as error:
	print(f"Error: {error}")
	import boto3, sys

	"""
	Attached policies have to be deleted before a role can be deleted, but inline policies will be deleted as part of the role
	"""

	def detach_attached_policies(iam, role_name):
	paginator = iam.get_paginator('list_attached_role_policies')
	for response in paginator.paginate(RoleName=role_name):
	for policy in response['AttachedPolicies']:
	iam.detach_role_policy(RoleName=role_name,
	PolicyArn=policy['PolicyArn'])

	if __name__ == "__main__":
	try:
	if len(sys.argv) < 2:
	raise RuntimeError("please enter role name")
	role_name = sys.argv[1]
	iam = boto3.client('iam')
	detach_attached_policies(iam, role_name)
	print (iam.delete_role(RoleName=role_name))
	except RuntimeError as error:
	print (f"Error: {error}")
	# Use Python 3.10 slim image from the Python Software Foundation
	FROM python:3.10-slim

	# Upgrade pip
	RUN pip install --upgrade pip

	# Set the working directory to the root of the container
	WORKDIR /app

	# Copy files into the image
	COPY pipeline.py .
	COPY inference.py .
	COPY app.py .
	COPY serve.sh .

	# Copy directories into the image
	COPY model model/
	COPY tokenizer tokenizer/

	# Create requirements.txt file
	RUN echo "torch\ntransformers\nflask\ngunicorn" > requirements.txt

	# Install packages from requirements.txt
	RUN pip install -r requirements.txt

	# Expose port 8080
	EXPOSE 8080

	# Make the serve script executable
	RUN chmod +x serve.sh

	# Make a symbolic link to ensure 'serve' is available globally
	RUN ln -s /app/serve.sh /usr/local/bin/serve

	# Command to run when starting the container
	CMD ["serve"]
	import os, transformers, warnings

	warnings.simplefilter(action='ignore', category=FutureWarning)

	if __name__ == "__main__":
	if not os.path.exists("tmp"):
	os.mkdir("tmp")
	model = transformers.BertModel.from_pretrained('bert-base-multilingual-cased')
	model.save_pretrained('./model')
	tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-multilingual-cased')
	tokenizer.save_pretrained('./tokenizer')
	"""
	inference.py follows the standard interface pattern accepted by sagemaker

	Pre- built images contain their own webserver and inference.py can be specified using the sagemaker.model.Model entry_point parameter, slotting in neatly as a a request handler

	Custom images need to define their own webserver, again following the pattern acceptable to sagemaker, but accordingly don't necessarily need to follow the same format for inference.py

	However it can be useful to follow the same pattern by way of convention; if the community were ever to read this code, or if AWS were ever to provide a custom image containing BertModel and BertTokenizer
	"""

	from transformers import BertModel, BertTokenizer

	from pipeline import run_pipeline

	import json, os

	def model_fn(model_dir):
	model_path = os.path.join(model_dir, "model")
	tokenizer_path = os.path.join(model_dir, "tokenizer")
	model = BertModel.from_pretrained(model_path)
	tokenizer = BertTokenizer.from_pretrained(tokenizer_path)
	return {"model": model,
	"tokenizer": tokenizer}

	def input_fn(request_body, request_content_type):
	if request_content_type == 'application/json':
	input_data = json.loads(request_body)
	return input_data
	else:
	raise ValueError("Unsupported content type: {}".format(request_content_type))

	def predict_fn(input_data, model_artifacts):
	return run_pipeline(model = model_artifacts['model'],
	tokenizer = model_artifacts['tokenizer'],
	src = input_data["src"],
	tgt = input_data["tgt"])

	def output_fn(prediction_output, accept):
	if str(accept) == "application/json":
	response = prediction_output
	return json.dumps(response)
	else:
	raise ValueError("Unsupported accept type: {}".format(accept))

	if __name__ == "__main__":
	pass
	import boto3, os

	if __name__ == "__main__":
	iam = boto3.client('iam')
	paginator = iam.get_paginator('list_policies')
	account_id = os.environ["AWS_ACCOUNT_ID"]
	for page in paginator.paginate():
	for policy in page['Policies']:
	if account_id in policy["Arn"]:
	print (f"{policy['PolicyName']}\t{policy['Arn']}")
	import boto3

	if __name__ == "__main__":
	sagemaker = boto3.client("sagemaker")
	paginator = sagemaker.get_paginator('list_endpoint_configs')
	for page in paginator.paginate():
	for config in page['EndpointConfigs']:
	print(f"Endpoint Config Name: {config['EndpointConfigName']}")