Skip to content

Instantly share code, notes, and snippets.

@ChrisRomp
Last active February 6, 2024 18:45
Show Gist options
  • Save ChrisRomp/f038cc233d99eaf578065723bade2a26 to your computer and use it in GitHub Desktop.
Save ChrisRomp/f038cc233d99eaf578065723bade2a26 to your computer and use it in GitHub Desktop.
APIM Load Balancing Policy - Round Robin with Azure OpenAI (AOAI)
<!-- This shows the policy as implemented with references to {{named values}} and fragments -->
<!-- Named values: https://learn.microsoft.com/en-us/azure/api-management/api-management-howto-properties -->
<!-- Policy fragments: https://learn.microsoft.com/en-us/azure/api-management/policy-fragments -->
<policies>
<inbound>
<base />
<!-- This requires enabling the managed identity on APIM, and granting it access to AOAI -->
<authentication-managed-identity resource="https://cognitiveservices.azure.com" output-token-variable-name="msi-access-token" ignore-error="false" />
<set-header name="Authorization" exists-action="override">
<value>@("Bearer " + (string)context.Variables["msi-access-token"])</value>
</set-header>
<!-- Get a set of backend URLs from named values -->
<set-variable name="backend0" value="{{aoai-backend-0}}" />
<set-variable name="backend1" value="{{aoai-backend-1}}" />
<set-variable name="backend2" value="{{aoai-backend-2}}" />
<!-- Check the cache for a counter -->
<cache-lookup-value key="backend-counter" variable-name="backend-counter" />
<choose>
<!-- Cache miss, so initalize value and cache it -->
<when condition="@(!context.Variables.ContainsKey("backend-counter"))">
<set-variable name="backend-counter" value="0" />
<cache-store-value key="backend-counter" value="0" duration="100" />
</when>
</choose>
<!-- Policy fragment to choose a backend based on the cached index, then update cache. See effective policy. -->
<include-fragment fragment-id="aoai-select-backend-cached" />
</inbound>
<backend>
<!-- This retry policy will fire on any response code >= 400 (including 429), calling the next AOAI instance -->
<retry condition="@(context.Response.StatusCode >= 400)" count="3" interval="5" first-fast-retry="true">
<cache-lookup-value key="backend-counter" variable-name="backend-counter" />
<!-- Same policy fragment as before -->
<include-fragment fragment-id="aoai-select-backend-cached" />
<forward-request buffer-request-body="true" />
</retry>
</backend>
<outbound>
<base />
</outbound>
<on-error>
<base />
</on-error>
</policies>
<!-- This shows the effective policy, including inheritance and the outputs of named values and policy fragments -->
<policies>
<inbound>
<authentication-managed-identity resource="https://cognitiveservices.azure.com" output-token-variable-name="msi-access-token" ignore-error="false" />
<set-header name="Authorization" exists-action="override">
<value>@("Bearer " + (string)context.Variables["msi-access-token"])</value>
</set-header>
<set-variable name="backend0" value="https://cr1-openai-ncus1.openai.azure.com/openai/" />
<set-variable name="backend1" value="https://cr1-openai-eastus-02.openai.azure.com/openai/" />
<set-variable name="backend2" value="https://cr1-openai-canadaeast1.openai.azure.com/openai/" />
<cache-lookup-value key="backend-counter" variable-name="backend-counter" />
<choose>
<when condition="@(!context.Variables.ContainsKey("backend-counter"))">
<set-variable name="backend-counter" value="0" />
<cache-store-value key="backend-counter" value="0" duration="100" />
</when>
</choose>
<!--include-fragment: Begin aoai-select-backend-cached policy fragment scope-->
<choose>
<when condition="@(Convert.ToInt32(context.Variables["backend-counter"]) == 0)">
<set-backend-service base-url="@((string)context.Variables["backend0"])" />
<set-variable name="backend-counter" value="1" />
<cache-store-value key="backend-counter" value="1" duration="100" />
</when>
<when condition="@(Convert.ToInt32(context.Variables["backend-counter"]) == 1)">
<set-backend-service base-url="@((string)context.Variables["backend1"])" />
<set-variable name="backend-counter" value="2" />
<cache-store-value key="backend-counter" value="2" duration="100" />
</when>
<otherwise>
<set-backend-service base-url="@((string)context.Variables["backend2"])" />
<set-variable name="backend-counter" value="0" />
<cache-store-value key="backend-counter" value="0" duration="100" />
</otherwise>
</choose>
<!--include-fragment: End aoai-select-backend-cached policy fragment scope-->
</inbound>
<backend>
<retry condition="@(context.Response.StatusCode >= 400)" count="3" interval="5" first-fast-retry="true">
<cache-lookup-value key="backend-counter" variable-name="backend-counter" />
<!--include-fragment: Begin aoai-select-backend-cached policy fragment scope-->
<choose>
<when condition="@(Convert.ToInt32(context.Variables["backend-counter"]) == 0)">
<set-backend-service base-url="@((string)context.Variables["backend0"])" />
<set-variable name="backend-counter" value="1" />
<cache-store-value key="backend-counter" value="1" duration="100" />
</when>
<when condition="@(Convert.ToInt32(context.Variables["backend-counter"]) == 1)">
<set-backend-service base-url="@((string)context.Variables["backend1"])" />
<set-variable name="backend-counter" value="2" />
<cache-store-value key="backend-counter" value="2" duration="100" />
</when>
<otherwise>
<set-backend-service base-url="@((string)context.Variables["backend2"])" />
<set-variable name="backend-counter" value="0" />
<cache-store-value key="backend-counter" value="0" duration="100" />
</otherwise>
</choose>
<!--include-fragment: End aoai-select-backend-cached policy fragment scope-->
<forward-request buffer-request-body="true" />
</retry>
</backend>
<outbound />
<on-error />
</policies>
@ChrisRomp
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment