cedricvidal/open_llm_openai.py

## open_llm_openai.py
# Those endpoints don't use the usual Azure OpenAI scheme, they use the OpenAI scheme.
# They also take the model field to route to the proper deployment, but I haven't verified this works

# Tested with openai 1.13.3

from openai import OpenAI
import logging

logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s - %(levelname)s - %(filename)s:%(funcName)s:%(lineno)d - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S')

# Replace this with the endpoint target
endpoint_url = ''
# Replace this with the endpoint key
api_key = ''

if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")

base_url = endpoint_url + '/v1'
client = OpenAI(
  base_url = base_url,
  api_key=api_key,
)

response = client.chat.completions.create(
    model="Llama-2-7b-chat-gmqyf", # model = "deployment_name".
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},
        {"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},
        {"role": "user", "content": "Do other Azure AI services support this too?"}
    ]
)

print(response.choices[0].message.content)

## open_llm_requests.py
import json
import os
import ssl
import requests

import logging

# These two lines enable debugging at httplib level (requests->urllib3->http.client)
# You will see the REQUEST, including HEADERS and DATA, and RESPONSE with HEADERS but without DATA.
# The only thing missing will be the response.body which is not logged.
try:
    import http.client as http_client
except ImportError:
    # Python 2
    import httplib as http_client
http_client.HTTPConnection.debuglevel = 1

# You must initialize logging, otherwise you'll not see debug output.
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
requests_log = logging.getLogger("requests.packages.urllib3")
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data = {
  "input_data": {
      "input_string": ["I believe the meaning of life is"],
      "parameters":{
              "top_p": 0.9,
              "temperature": 0.6,
              "max_new_tokens": 96,
              "do_sample": "true"
      }
  }
}

data = {
    "model": "llama-2-7b-hf",
    "messages": [
            {"role": "user", "content": "Can you tell me about your jackets?" }
        ],
        "n": 1,
        "top_p": 1.0,
        "temperature": 1.0,
        "max_new_tokens": 500,
        "max_tokens": 500
    }

body = json.dumps(data)

url = ''
# Replace this with the primary/secondary key or AMLToken for the endpoint
api_key = ''
model_deployment = Node # replace this if need be
api_type = "chat" # chat or other
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")


def sanitize_endpoint_url(endpoint_url: str, api_type: str):
    if api_type.lower() == "chat":
        if not endpoint_url.endswith("/v1/chat/completions"):
            return endpoint_url + "/v1/chat/completions"
    else:
        if not endpoint_url.endswith("/v1/completions"):
            return endpoint_url + "/v1/completions"
    return endpoint_url

# The azureml-model-deployment header will force the request to go to a specific deployment.
# Remove this header to have the request observe the endpoint traffic rules
headers = {
    'Content-Type':'application/json',
    'Authorization':('Bearer '+ api_key),
    }

if model_deployment is not None:
    headers['azureml-model-deployment'] = model_deployment

endpoint_url = sanitize_endpoint_url(url, api_type)
print("Calling " + endpoint_url)

try:
    result = requests.post(endpoint_url, data=body, headers=headers)
    print(result.text)
except requests.exceptions.RequestException as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))

## open_llm_urllib.py
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data = {
    "model": "Llama-2-7b-chat-gmqyf",
    "messages": [
            {"role": "system", "content": "You're a useful assistant" },
            {"role": "user", "content": "Can you tell me about your jackets?" }
        ],
        "n": 1,
        "top_p": 1.0,
        "temperature": 1.0,
        "max_new_tokens": 500,
        "max_tokens": 500
    }

url = ''
# Replace this with the primary/secondary key or AMLToken for the endpoint
api_key = ''
api_type = "chat" # chat or other
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")


def sanitize_endpoint_url(endpoint_url: str, api_type: str):
    if api_type.lower() == "chat":
        if not endpoint_url.endswith("/v1/chat/completions"):
            return endpoint_url + "/v1/chat/completions"
    else:
        if not endpoint_url.endswith("/v1/completions"):
            return endpoint_url + "/v1/completions"
    return endpoint_url

body = str.encode(json.dumps(data))

# The azureml-model-deployment header will force the request to go to a specific deployment.
# Remove this header to have the request observe the endpoint traffic rules
headers = {
    'Content-type':'application/json',
    'Authorization':('Bearer '+ api_key),
    }

endpoint_url = sanitize_endpoint_url(url, api_type)
req = urllib.request.Request(endpoint_url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))
	# Those endpoints don't use the usual Azure OpenAI scheme, they use the OpenAI scheme.
	# They also take the model field to route to the proper deployment, but I haven't verified this works

	# Tested with openai 1.13.3

	from openai import OpenAI
	import logging

	logging.basicConfig(level=logging.DEBUG,
	format='%(asctime)s - %(levelname)s - %(filename)s:%(funcName)s:%(lineno)d - %(message)s',
	datefmt='%Y-%m-%d %H:%M:%S')

	# Replace this with the endpoint target
	endpoint_url = ''
	# Replace this with the endpoint key
	api_key = ''

	if not api_key:
	raise Exception("A key should be provided to invoke the endpoint")

	base_url = endpoint_url + '/v1'
	client = OpenAI(
	base_url = base_url,
	api_key=api_key,
	)

	response = client.chat.completions.create(
	model="Llama-2-7b-chat-gmqyf", # model = "deployment_name".
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},
	{"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},
	{"role": "user", "content": "Do other Azure AI services support this too?"}
	]
	)

	print(response.choices[0].message.content)
	import json
	import os
	import ssl
	import requests

	import logging

	# These two lines enable debugging at httplib level (requests->urllib3->http.client)
	# You will see the REQUEST, including HEADERS and DATA, and RESPONSE with HEADERS but without DATA.
	# The only thing missing will be the response.body which is not logged.
	try:
	import http.client as http_client
	except ImportError:
	# Python 2
	import httplib as http_client
	http_client.HTTPConnection.debuglevel = 1

	# You must initialize logging, otherwise you'll not see debug output.
	logging.basicConfig()
	logging.getLogger().setLevel(logging.DEBUG)
	requests_log = logging.getLogger("requests.packages.urllib3")
	requests_log.setLevel(logging.DEBUG)
	requests_log.propagate = True

	def allowSelfSignedHttps(allowed):
	# bypass the server certificate verification on client side
	if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
	ssl._create_default_https_context = ssl._create_unverified_context

	allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

	# Request data goes here
	# The example below assumes JSON formatting which may be updated
	# depending on the format your endpoint expects.
	# More information can be found here:
	# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
	data = {
	"input_data": {
	"input_string": ["I believe the meaning of life is"],
	"parameters":{
	"top_p": 0.9,
	"temperature": 0.6,
	"max_new_tokens": 96,
	"do_sample": "true"
	}
	}
	}

	data = {
	"model": "llama-2-7b-hf",
	"messages": [
	{"role": "user", "content": "Can you tell me about your jackets?" }
	],
	"n": 1,
	"top_p": 1.0,
	"temperature": 1.0,
	"max_new_tokens": 500,
	"max_tokens": 500
	}

	body = json.dumps(data)

	url = ''
	# Replace this with the primary/secondary key or AMLToken for the endpoint
	api_key = ''
	model_deployment = Node # replace this if need be
	api_type = "chat" # chat or other
	if not api_key:
	raise Exception("A key should be provided to invoke the endpoint")


	def sanitize_endpoint_url(endpoint_url: str, api_type: str):
	if api_type.lower() == "chat":
	if not endpoint_url.endswith("/v1/chat/completions"):
	return endpoint_url + "/v1/chat/completions"
	else:
	if not endpoint_url.endswith("/v1/completions"):
	return endpoint_url + "/v1/completions"
	return endpoint_url

	# The azureml-model-deployment header will force the request to go to a specific deployment.
	# Remove this header to have the request observe the endpoint traffic rules
	headers = {
	'Content-Type':'application/json',
	'Authorization':('Bearer '+ api_key),
	}

	if model_deployment is not None:
	headers['azureml-model-deployment'] = model_deployment

	endpoint_url = sanitize_endpoint_url(url, api_type)
	print("Calling " + endpoint_url)

	try:
	result = requests.post(endpoint_url, data=body, headers=headers)
	print(result.text)
	except requests.exceptions.RequestException as error:
	print("The request failed with status code: " + str(error.code))

	# Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
	print(error.info())
	print(error.read().decode("utf8", 'ignore'))
	import urllib.request
	import json
	import os
	import ssl

	def allowSelfSignedHttps(allowed):
	# bypass the server certificate verification on client side
	if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
	ssl._create_default_https_context = ssl._create_unverified_context

	allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

	# Request data goes here
	# The example below assumes JSON formatting which may be updated
	# depending on the format your endpoint expects.
	# More information can be found here:
	# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
	data = {
	"model": "Llama-2-7b-chat-gmqyf",
	"messages": [
	{"role": "system", "content": "You're a useful assistant" },
	{"role": "user", "content": "Can you tell me about your jackets?" }
	],
	"n": 1,
	"top_p": 1.0,
	"temperature": 1.0,
	"max_new_tokens": 500,
	"max_tokens": 500
	}

	url = ''
	# Replace this with the primary/secondary key or AMLToken for the endpoint
	api_key = ''
	api_type = "chat" # chat or other
	if not api_key:
	raise Exception("A key should be provided to invoke the endpoint")


	def sanitize_endpoint_url(endpoint_url: str, api_type: str):
	if api_type.lower() == "chat":
	if not endpoint_url.endswith("/v1/chat/completions"):
	return endpoint_url + "/v1/chat/completions"
	else:
	if not endpoint_url.endswith("/v1/completions"):
	return endpoint_url + "/v1/completions"
	return endpoint_url

	body = str.encode(json.dumps(data))

	# The azureml-model-deployment header will force the request to go to a specific deployment.
	# Remove this header to have the request observe the endpoint traffic rules
	headers = {
	'Content-type':'application/json',
	'Authorization':('Bearer '+ api_key),
	}

	endpoint_url = sanitize_endpoint_url(url, api_type)
	req = urllib.request.Request(endpoint_url, body, headers)

	try:
	response = urllib.request.urlopen(req)

	result = response.read()
	print(result)
	except urllib.error.HTTPError as error:
	print("The request failed with status code: " + str(error.code))

	# Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
	print(error.info())
	print(error.read().decode("utf8", 'ignore'))