Created
March 1, 2024 22:41
-
-
Save skeptrunedev/fe29c6e1d4072df529031ec4cd1acdbe to your computer and use it in GitHub Desktop.
Create a Trieve Dataset and Upload Chunks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import requests | |
from dotenv import load_dotenv | |
load_dotenv() | |
api_key = os.environ.get("API_KEY") | |
dataset_id = os.environ.get("DATASET_ID") | |
organization_id = os.environ.get("ORGANIZATION_ID") | |
chunks_ = [ | |
"As an experienced cardiologist with a rich history of helping patients achieve heart health, I'll", | |
"guide you through the intricate world of cardiology. In this comprehensive guide, we'll explore", | |
"cardiovascular health, common heart conditions, prevention strategies, and more. Let's embark on a", | |
"journey to understand and nurture our most vital organ – the heart. ### 1. Introduction to", | |
] | |
def create_trieve_dataset(dataset_name): | |
""" | |
Create a new dataset in your Trieve organization | |
""" | |
response = requests.post( | |
f"https://api.trieve.ai/api/dataset", | |
headers={ | |
"Authorization": f"{api_key}", | |
"TR-Organization": f"{organization_id}", | |
"Content-Type": "application/json", | |
}, | |
data=json.dumps( | |
{ | |
"organization_id": organization_id, | |
"dataset_name": dataset_name, | |
"server_configuration": {}, | |
"client_configuration": {}, | |
} | |
), | |
) | |
# new_dataset = response.json() | |
if response.status_code == 200: | |
return json.loads(response.text) | |
else: | |
return { | |
"error": f"Error in create_trieve_dataset: {response.status_code} - {response.text}" | |
} | |
def insert_chunks_into_dataset(api_key, dataset_id, role, chunks): | |
url = f"https://api.trieve.ai/api/chunk" | |
headers = { | |
"Content-Type": "application/json", | |
"TR-Dataset": dataset_id, | |
"Authorization": api_key, | |
} | |
for chunk in chunks: | |
payload = { | |
"chunk_html": chunk, | |
"tag_set": [role], | |
} | |
try: | |
response = requests.post(url, headers=headers, json=payload) | |
print(f"created chunk: {response.status_code}") | |
except requests.exceptions.RequestException as e: | |
print(f"Error in insert_chunks_into_dataset: {e}") | |
role = "umar" | |
# Create a dataset for the role | |
dataset_name = f"{role}_dataset" | |
dataset_response = create_trieve_dataset(dataset_name) | |
if "id" in dataset_response: | |
dataset_id = dataset_response["id"] | |
# Insert chunks into the dataset | |
insert_chunks_into_dataset(api_key, dataset_id, role, chunks_) | |
print(f"Inserted chunks for {role} into dataset {dataset_id}") | |
else: | |
print(f"Error creating dataset for {role}: {dataset_response['error']}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment