Last active
July 26, 2023 08:37
-
-
Save harshsinghal/912ea48e554f2d328d9d3bc1ca017c79 to your computer and use it in GitHub Desktop.
Accompanying code to go with a series of posts on Retrieval Augmented Generation posts on LinkedIn by Harsh Singhal https://www.linkedin.com/in/harshsinghal/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ___ | |
# / ()\\ | |
# _|_____|_ | |
# | | === | | | |
# |_| O |_| | |
# || O || | |
# ||__*__|| | |
# |~ \\___/ ~| | |
# /=\\ /=\\ /=\\ | |
#______[_]_[_]_[_]_______ | |
# Import necessary classes and functions from the llama_index and langchain libraries | |
from llama_index import ( | |
GPTVectorStoreIndex, | |
SimpleDirectoryReader, | |
ServiceContext, | |
StorageContext, | |
LLMPredictor, | |
load_index_from_storage, | |
) | |
from langchain.chat_models import ChatOpenAI | |
# Import the openai library and os module to set the API key | |
import openai | |
import os | |
# SECURITY ALERT: Never reveal your API keys directly in code. Use environment variables or other secure means. | |
# Here, we're setting the OpenAI API key both using an environment variable and directly (demonstration purposes only) | |
os.environ['OPENAI_API_KEY'] = 'YOUR_API_KEY' | |
openai.api_key = 'YOUR_API_KEY' | |
# Notify the user that the document loading process has begun | |
print("started the loading document process...") | |
# Read the data from the specified directory. Change './boiler_docs/' to your desired path. | |
documents = SimpleDirectoryReader('./aws_case_documents/').load_data() | |
# Initialize the LLMPredictor with the desired GPT-3.5-turbo model and temperature setting | |
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")) | |
# Create a ServiceContext using the initialized predictor | |
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor) | |
# Notify the user that the indexing process has begun | |
print("started the indexing process...") | |
# Create an index using the loaded documents and the created service context | |
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context) | |
# Store the created index to the disk at the specified location | |
print("storing the index to disk") | |
index.storage_context.persist(persist_dir="./aws_case_documents_index") | |
# Notify the user that we are querying the index | |
print("Querying the index...") | |
# Query the index for the provided question and store the response | |
response = index.as_query_engine().query("Write a detailed summary of AWS Personalize?") | |
# Print the received response | |
print(response) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Replace 'YOUR_API_KEY' with their actual OpenAI API key.
Install the required libraries/packages.
Adjust the documents_folder and index_name paths according to their directory structure.
Run the script using:
python script_name.py