Skip to content

Instantly share code, notes, and snippets.

@msoftware
Created January 2, 2026 09:13
Show Gist options
  • Select an option

  • Save msoftware/25c0d206d6d1b8eb38cb0c234201f8b3 to your computer and use it in GitHub Desktop.

Select an option

Save msoftware/25c0d206d6d1b8eb38cb0c234201f8b3 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "dc2117fc-d903-47de-af0b-53d8fa03b332",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import time\n",
"import json\n",
"import ollama\n",
"import base64\n",
"import mariadb\n",
"import sys"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "bf811701-eb16-4873-9138-983497793c65",
"metadata": {},
"outputs": [],
"source": [
"host = 'localhost'\n",
"port = 3306\n",
"user = 'user'\n",
"password = 'password'\n",
"database = 'test_database'\n",
"\n",
"conn = mariadb.connect(user=user, password=password, host=host, port=port, database=database)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "681667c4-21b8-48e5-a53f-668eac746539",
"metadata": {},
"outputs": [],
"source": [
"def get_embeddings(text_to_embed, model_name=\"qwen3-embedding:8b\"):\n",
" \"\"\"\n",
" Generates embeddings for a given text using an Ollama model.\n",
"\n",
" Args:\n",
" text_to_embed (str): The text for which to generate embeddings.\n",
" model_name (str): The name of the Ollama model to use (default: \"qwen3-embedding:8b\").\n",
"\n",
" Returns:\n",
" list: A list of floats representing the embedding vector, or None if an error occurs.\n",
" \"\"\"\n",
" try:\n",
" response = ollama.embeddings(\n",
" model=model_name,\n",
" prompt=text_to_embed\n",
" )\n",
" return response['embedding']\n",
" except Exception as e:\n",
" print(f\"Error generating embeddings: {e}\")\n",
" return None"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "03742106-e2b4-4a24-9249-1bc0ef701c38",
"metadata": {},
"outputs": [],
"source": [
"# --- MariaDB Insertion Function ---\n",
"def insert_embedding_into_mariadb(embedding_vector):\n",
" \"\"\"\n",
" Inserts an embedding vector into the MariaDB vector table.\n",
"\n",
" Args:\n",
" embedding_vector (list): The embedding vector as a list of floats.\n",
" db_config (dict): Dictionary containing MariaDB connection details.\n",
" \"\"\"\n",
" if not embedding_vector:\n",
" print(\"No embedding vector provided to insert.\")\n",
" return\n",
"\n",
" cursor = None\n",
" try:\n",
" cursor = conn.cursor()\n",
"\n",
" rounded_embedding = [round(float(x), 4) for x in embedding_vector] # Ensure x is float just in case\n",
" embedding_json_string = json.dumps(rounded_embedding) # Use the rounded embedding\n",
"\n",
" # embedding_json_string = json.dumps(embedding_vector)\n",
" \n",
" print(f\"DEBUG: JSON string being sent: {embedding_json_string[:200]}...\") # Print first 200 chars\n",
"\n",
" # SQL to insert into the vector table\n",
" # We're letting 'id' auto-increment\n",
" sql = \"INSERT INTO `qwen3-embedding-8b` (embedding) VALUES (VEC_FromText(?))\"\n",
"\n",
" # Execute the insert statement\n",
" cursor.execute(sql, (embedding_json_string,))\n",
"\n",
" # Commit the transaction\n",
" conn.commit()\n",
" print(f\"Successfully inserted embedding with ID: {cursor.lastrowid}\")\n",
"\n",
" except mariadb.Error as e:\n",
" print(f\"Error inserting into MariaDB: {e}\")\n",
" if conn:\n",
" conn.rollback() # Rollback on error\n",
" finally:\n",
" if cursor:\n",
" cursor.close()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7174559d-6b9c-4d55-86cf-b7507c93271e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Processing text 1: 'The cat sat on the mat.'\n",
"Generated embedding (first 5 elements): [-0.0026666224002838135, 0.013000323437154293, 0.002823619171977043, -0.03171399608254433, -0.010299142450094223]...\n",
"Embedding length: 4096\n",
"DEBUG: JSON string being sent: [-0.0027, 0.013, 0.0028, -0.0317, -0.0103, -0.0205, 0.0077, -0.0095, -0.0049, 0.0147, 0.0032, 0.0119, -0.0104, -0.0235, 0.0286, -0.0097, 0.0095, 0.0209, 0.0259, 0.0389, -0.0148, 0.0202, 0.0151, -0.039...\n",
"Successfully inserted embedding with ID: 1\n",
"\n",
"Processing text 2: 'Dogs love to chase balls in the park.'\n",
"Generated embedding (first 5 elements): [-0.014950787648558617, 0.0052628363482654095, -0.006423640064895153, -0.038704320788383484, 0.003801250597462058]...\n",
"Embedding length: 4096\n",
"DEBUG: JSON string being sent: [-0.015, 0.0053, -0.0064, -0.0387, 0.0038, 0.0355, -0.0064, -0.0069, 0.0098, 0.0521, -0.0112, -0.0127, -0.0026, -0.0042, 0.0051, 0.0235, 0.0172, 0.0338, 0.0282, 0.0305, 0.0117, -0.0134, -0.0045, -0.02...\n",
"Successfully inserted embedding with ID: 2\n",
"\n",
"Processing text 3: 'Artificial intelligence is transforming many industries.'\n",
"Generated embedding (first 5 elements): [0.012756548821926117, 0.03550448268651962, -0.016718043014407158, -0.024982227012515068, 0.07379638403654099]...\n",
"Embedding length: 4096\n",
"DEBUG: JSON string being sent: [0.0128, 0.0355, -0.0167, -0.025, 0.0738, -0.0007, -0.0172, -0.0006, 0.0161, 0.0276, -0.0306, 0.0278, 0.0406, 0.0143, 0.051, 0.009, 0.0084, -0.0144, -0.0012, -0.0082, -0.0137, 0.0227, 0.0034, 0.0038, ...\n",
"Successfully inserted embedding with ID: 3\n",
"\n",
"Processing text 4: 'A quantum computer can solve certain problems much faster.'\n",
"Generated embedding (first 5 elements): [0.0023580908309668303, 0.01199082937091589, 0.021898211911320686, 0.00322919525206089, 0.04967232048511505]...\n",
"Embedding length: 4096\n",
"DEBUG: JSON string being sent: [0.0024, 0.012, 0.0219, 0.0032, 0.0497, -0.0074, -0.0221, -0.0398, 0.0328, 0.0069, -0.0609, 0.032, 0.0097, 0.0115, 0.0484, -0.0092, 0.0177, -0.0069, 0.0132, -0.0035, -0.0012, 0.0353, 0.0107, 0.0, -0.0...\n",
"Successfully inserted embedding with ID: 4\n",
"\n",
"Insertion process complete.\n"
]
}
],
"source": [
"texts_to_process = [\n",
" \"The cat sat on the mat.\",\n",
" \"Dogs love to chase balls in the park.\",\n",
" \"Artificial intelligence is transforming many industries.\",\n",
" \"A quantum computer can solve certain problems much faster.\"\n",
"]\n",
"\n",
"for i, text in enumerate(texts_to_process):\n",
" print(f\"\\nProcessing text {i+1}: '{text}'\")\n",
" embedding = get_embeddings(text)\n",
"\n",
" if embedding:\n",
" print(f\"Generated embedding (first 5 elements): {embedding[:5]}...\")\n",
" print(f\"Embedding length: {len(embedding)}\")\n",
" if len(embedding) != 4096:\n",
" print(f\"Warning: Embedding length {len(embedding)} does not match table's VECTOR(4096). This might cause issues or be truncated.\")\n",
" insert_embedding_into_mariadb(embedding)\n",
" else:\n",
" print(f\"Could not generate embedding for text: '{text}'\")\n",
"\n",
"print(\"\\nInsertion process complete.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a87723e4-0d1f-499c-aa70-d8d53f66ce4b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment