Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save skeptrunedev/fe29c6e1d4072df529031ec4cd1acdbe to your computer and use it in GitHub Desktop.
Save skeptrunedev/fe29c6e1d4072df529031ec4cd1acdbe to your computer and use it in GitHub Desktop.
Create a Trieve Dataset and Upload Chunks
import json
import os
import requests
from dotenv import load_dotenv
load_dotenv()
api_key = os.environ.get("API_KEY")
dataset_id = os.environ.get("DATASET_ID")
organization_id = os.environ.get("ORGANIZATION_ID")
chunks_ = [
"As an experienced cardiologist with a rich history of helping patients achieve heart health, I'll",
"guide you through the intricate world of cardiology. In this comprehensive guide, we'll explore",
"cardiovascular health, common heart conditions, prevention strategies, and more. Let's embark on a",
"journey to understand and nurture our most vital organ – the heart. ### 1. Introduction to",
]
def create_trieve_dataset(dataset_name):
"""
Create a new dataset in your Trieve organization
"""
response = requests.post(
f"https://api.trieve.ai/api/dataset",
headers={
"Authorization": f"{api_key}",
"TR-Organization": f"{organization_id}",
"Content-Type": "application/json",
},
data=json.dumps(
{
"organization_id": organization_id,
"dataset_name": dataset_name,
"server_configuration": {},
"client_configuration": {},
}
),
)
# new_dataset = response.json()
if response.status_code == 200:
return json.loads(response.text)
else:
return {
"error": f"Error in create_trieve_dataset: {response.status_code} - {response.text}"
}
def insert_chunks_into_dataset(api_key, dataset_id, role, chunks):
url = f"https://api.trieve.ai/api/chunk"
headers = {
"Content-Type": "application/json",
"TR-Dataset": dataset_id,
"Authorization": api_key,
}
for chunk in chunks:
payload = {
"chunk_html": chunk,
"tag_set": [role],
}
try:
response = requests.post(url, headers=headers, json=payload)
print(f"created chunk: {response.status_code}")
except requests.exceptions.RequestException as e:
print(f"Error in insert_chunks_into_dataset: {e}")
role = "umar"
# Create a dataset for the role
dataset_name = f"{role}_dataset"
dataset_response = create_trieve_dataset(dataset_name)
if "id" in dataset_response:
dataset_id = dataset_response["id"]
# Insert chunks into the dataset
insert_chunks_into_dataset(api_key, dataset_id, role, chunks_)
print(f"Inserted chunks for {role} into dataset {dataset_id}")
else:
print(f"Error creating dataset for {role}: {dataset_response['error']}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment