Skip to content

Instantly share code, notes, and snippets.

@gmasse
Last active June 3, 2024 20:22
Show Gist options
  • Save gmasse/e1f99339e161f4830df6be5d0095349a to your computer and use it in GitHub Desktop.
Save gmasse/e1f99339e161f4830df6be5d0095349a to your computer and use it in GitHub Desktop.
llama-index starter tutorial with OVHCloud AI Endpoints
#!/usr/bin/env python3
### Llama-index starter tutorial with OVHcloud AI Endpoints
import os
import requests
import time
import logging
import sys
from llama_index.llms.openai_like import OpenAILike
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
from typing import Any, List, Optional
from llama_index.core.embeddings import BaseEmbedding
from llama_index.core.bridge.pydantic import PrivateAttr
#logging.basicConfig(stream=sys.stdout, level=logging.INFO)
#logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
"""
Usage:
OVH_AI_ENDPOINTS_ACCESS_TOKEN="your-token" python3 llama-index_starter.py
NB: Make sure you are using a valid token. In the contrary, document indexing will be long due to rate-limite
"""
class OVHcloudAIEEmbeddings(BaseEmbedding):
_api_key: str = PrivateAttr()
_api_base: str = PrivateAttr()
def __init__(
self,
api_key: Optional[str] = None,
api_base: str = "https://multilingual-e5-base.endpoints.kepler.ai.cloud.ovh.net/api/text2vec",
**kwargs: Any,
) -> None:
self._api_key = api_key or os.environ.get("OVH_AI_ENDPOINTS_ACCESS_TOKEN", None)
self._api_base = api_base
super().__init__(**kwargs)
@classmethod
def class_name(cls) -> str:
return "ovhcloud ai endpoints embedding"
def _generate_embedding(self, text: str) -> List[float]:
"""Generate embeddings from OVHCLOUD AIE.
Args:
text: str. An input text sentence or document.
Returns:
embeddings: a list of float numbers. Embeddings correspond to your given text.
"""
headers = {
"content-type": "text/plain",
"Authorization": f"Bearer {self._api_key}",
}
session = requests.session()
while True:
response = session.post(
self._api_base,
headers=headers,
data=text,
)
if response.status_code != 200:
if response.status_code == 429:
"""Rate limit exceeded, wait for reset"""
reset_time = int(response.headers.get("RateLimit-Reset", 0))
logging.info("Rate limit exceeded. Waiting %d seconds.", reset_time)
if reset_time > 0:
time.sleep(reset_time)
continue
else:
"""Rate limit reset time has passed, retry immediately"""
continue
""" Handle other non-200 status codes """
raise ValueError(
f"Request failed with status code {response.status_code}: {response.text}"
)
return response.json()
async def _aget_query_embedding(self, query: str) -> List[float]:
return self._get_query_embedding(query)
async def _aget_text_embedding(self, text: str) -> List[float]:
return self._get_text_embedding(text)
def _get_text_embedding(self, text: str) -> List[float]:
"""Get text embedding."""
return self._generate_embedding(text)
def _get_query_embedding(self, query: str) -> List[float]:
"""Get query embedding."""
return self._generate_embedding(query)
def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
"""Get text embeddings."""
return [self._generate_embedding(text) for text in texts]
Settings.embed_model = OVHcloudAIEEmbeddings()
Settings.chunk_size = 512
Settings.context_window = 4096
Settings.num_output = 256
Settings.llm = OpenAILike(model="Mixtral-8x7B-Instruct-v0.1", api_base="https://mixtral-8x7b-instruct-v01.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1", api_key=os.environ.get("OVH_AI_ENDPOINTS_ACCESS_TOKEN", None), temperature=0.1, max_tokens=Settings.num_output)
# mkdir data
# curl --output-dir data -O https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt
documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)
response = index.as_query_engine().query("What did the author do growing up?")
print(response)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment