skeptrunedev/trieve_create_dataset_and_add_chunks.py

## trieve_create_dataset_and_add_chunks.py
import json
import os
import requests
from dotenv import load_dotenv

load_dotenv()

api_key = os.environ.get("API_KEY")
dataset_id = os.environ.get("DATASET_ID")
organization_id = os.environ.get("ORGANIZATION_ID")

chunks_ = [
    "As an experienced cardiologist with a rich history of helping patients achieve heart health, I'll",
    "guide you through the intricate world of cardiology. In this comprehensive guide, we'll explore",
    "cardiovascular health, common heart conditions, prevention strategies, and more. Let's embark on a",
    "journey to understand and nurture our most vital organ – the heart.  ### 1. Introduction to",
]


def create_trieve_dataset(dataset_name):
    """
    Create a new dataset in your Trieve organization
    """
    response = requests.post(
        f"https://api.trieve.ai/api/dataset",
        headers={
            "Authorization": f"{api_key}",
            "TR-Organization": f"{organization_id}",
            "Content-Type": "application/json",
        },
        data=json.dumps(
            {
                "organization_id": organization_id,
                "dataset_name": dataset_name,
                "server_configuration": {},
                "client_configuration": {},
            }
        ),
    )

    # new_dataset = response.json()
    if response.status_code == 200:
        return json.loads(response.text)
    else:
        return {
            "error": f"Error in create_trieve_dataset: {response.status_code} - {response.text}"
        }


def insert_chunks_into_dataset(api_key, dataset_id, role, chunks):
    url = f"https://api.trieve.ai/api/chunk"

    headers = {
        "Content-Type": "application/json",
        "TR-Dataset": dataset_id,
        "Authorization": api_key,
    }

    for chunk in chunks:
        payload = {
            "chunk_html": chunk,
            "tag_set": [role],
        }

        try:
            response = requests.post(url, headers=headers, json=payload)
            print(f"created chunk: {response.status_code}")
        except requests.exceptions.RequestException as e:
            print(f"Error in insert_chunks_into_dataset: {e}")


role = "umar"
# Create a dataset for the role
dataset_name = f"{role}_dataset"
dataset_response = create_trieve_dataset(dataset_name)

if "id" in dataset_response:
    dataset_id = dataset_response["id"]
    # Insert chunks into the dataset
    insert_chunks_into_dataset(api_key, dataset_id, role, chunks_)
    print(f"Inserted chunks for {role} into dataset {dataset_id}")
else:
    print(f"Error creating dataset for {role}: {dataset_response['error']}")
	import json
	import os
	import requests
	from dotenv import load_dotenv

	load_dotenv()

	api_key = os.environ.get("API_KEY")
	dataset_id = os.environ.get("DATASET_ID")
	organization_id = os.environ.get("ORGANIZATION_ID")

	chunks_ = [
	"As an experienced cardiologist with a rich history of helping patients achieve heart health, I'll",
	"guide you through the intricate world of cardiology. In this comprehensive guide, we'll explore",
	"cardiovascular health, common heart conditions, prevention strategies, and more. Let's embark on a",
	"journey to understand and nurture our most vital organ – the heart. ### 1. Introduction to",
	]


	def create_trieve_dataset(dataset_name):
	"""
	Create a new dataset in your Trieve organization
	"""
	response = requests.post(
	f"https://api.trieve.ai/api/dataset",
	headers={
	"Authorization": f"{api_key}",
	"TR-Organization": f"{organization_id}",
	"Content-Type": "application/json",
	},
	data=json.dumps(
	{
	"organization_id": organization_id,
	"dataset_name": dataset_name,
	"server_configuration": {},
	"client_configuration": {},
	}
	),
	)

	# new_dataset = response.json()
	if response.status_code == 200:
	return json.loads(response.text)
	else:
	return {
	"error": f"Error in create_trieve_dataset: {response.status_code} - {response.text}"
	}


	def insert_chunks_into_dataset(api_key, dataset_id, role, chunks):
	url = f"https://api.trieve.ai/api/chunk"

	headers = {
	"Content-Type": "application/json",
	"TR-Dataset": dataset_id,
	"Authorization": api_key,
	}

	for chunk in chunks:
	payload = {
	"chunk_html": chunk,
	"tag_set": [role],
	}

	try:
	response = requests.post(url, headers=headers, json=payload)
	print(f"created chunk: {response.status_code}")
	except requests.exceptions.RequestException as e:
	print(f"Error in insert_chunks_into_dataset: {e}")


	role = "umar"
	# Create a dataset for the role
	dataset_name = f"{role}_dataset"
	dataset_response = create_trieve_dataset(dataset_name)

	if "id" in dataset_response:
	dataset_id = dataset_response["id"]
	# Insert chunks into the dataset
	insert_chunks_into_dataset(api_key, dataset_id, role, chunks_)
	print(f"Inserted chunks for {role} into dataset {dataset_id}")
	else:
	print(f"Error creating dataset for {role}: {dataset_response['error']}")