Skip to content

Instantly share code, notes, and snippets.

@duarteocarmo
Created November 11, 2024 13:04
Show Gist options
  • Save duarteocarmo/4937eedc966a365ed2a518c1ff586c3d to your computer and use it in GitHub Desktop.
Save duarteocarmo/4937eedc966a365ed2a518c1ff586c3d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Table of contents**<a id='toc0_'></a> \n",
"- [NOTE: Inspired by this](#toc1_) \n",
"- [Load model](#toc2_) \n",
"- [Outlines choice mode](#toc3_) \n",
"- [Pydantic like tutorial](#toc4_) \n",
"- [No chat templates, slightly bigger model](#toc5_) \n",
"- [Batch?](#toc6_) \n",
"\n",
"<!-- vscode-jupyter-toc-config\n",
"\tnumbering=false\n",
"\tanchor=true\n",
"\tflat=false\n",
"\tminLevel=1\n",
"\tmaxLevel=6\n",
"\t/vscode-jupyter-toc-config -->\n",
"<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/duarteocarmo/Library/Caches/pypoetry/virtualenvs/gtm-insights-intelligence-kfp-pipelines-Fe6-Kvub-py3.9/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import json\n",
"import outlines\n",
"import torch\n",
"from transformers import AutoTokenizer\n",
"from textwrap import dedent\n",
"from outlines.samplers import greedy, multinomial\n",
"from pydantic import BaseModel, Field, constr\n",
"from enum import Enum\n",
"import jupyter_black\n",
"import typing as t\n",
"from outlines import models, generate\n",
"import llama_cpp\n",
"\n",
"\n",
"jupyter_black.load()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# <a id='toc1_'></a>NOTE: Inspired by [this](https://github.com/willkurt/odsc-outlines-tutorial/blob/main/Answers/Exercise2-Solution.ipynb) [&#8593;](#toc0_)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"image/jpeg": "",
"text/html": [
"\n",
" <iframe\n",
" width=\"800\"\n",
" height=\"300\"\n",
" src=\"https://www.youtube.com/embed/2IkqM9k8swI\"\n",
" frameborder=\"0\"\n",
" allowfullscreen\n",
" \n",
" ></iframe>\n",
" "
],
"text/plain": [
"<IPython.lib.display.YouTubeVideo at 0x435412700>"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from IPython.display import YouTubeVideo\n",
"\n",
"YouTubeVideo(\"2IkqM9k8swI\", width=800, height=300)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# <a id='toc2_'></a>[Load model](#toc0_)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"model_name = \"HuggingFaceTB/SmolLM-1.7B-Instruct\"\n",
"model = outlines.models.transformers(\n",
" model_name,\n",
" device=\"mps\",\n",
" model_kwargs={\n",
" \"torch_dtype\": torch.bfloat16,\n",
" \"trust_remote_code\": True,\n",
" },\n",
")\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_name)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"class Sentiment(str, Enum):\n",
" POSITIVE = \"POSITIVE\"\n",
" NEGATIVE = \"NEGATIVE\"\n",
" NEUTRAL = \"NEUTRAL\"\n",
"\n",
"\n",
"class SentimentClassification(BaseModel):\n",
" sentiment: Sentiment"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def create_prompt(message: str):\n",
"\n",
" SYSTEM_PROMPT = \"\"\"\n",
"You are a seasoned data analyst.\n",
"Classify the following text into three sentiment classes, namely:\n",
"NEGATIVE: if the text has a negative tone\n",
"POSITIVE: if the text has a positive tone\n",
"NEUTRAL: if the text has a neutral tone\n",
"\n",
"Consider the overall sentiment of the text.\n",
"Only reply with 'POSITIVE', 'NEGATIVE', or 'NEUTRAL'.\n",
"\"\"\".strip()\n",
"\n",
" messages = [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": SYSTEM_PROMPT,\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"I undersand and only reply with 'POSITIVE', 'NEGATIVE', or 'NEUTRAL'.\",\n",
" },\n",
" {\"role\": \"user\", \"content\": message},\n",
" ]\n",
" sentiment_prompt = tokenizer.apply_chat_template(messages, tokenize=False)\n",
" return sentiment_prompt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# <a id='toc3_'></a>[Outlines choice mode](#toc0_)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/duarteocarmo/Library/Caches/pypoetry/virtualenvs/gtm-insights-intelligence-kfp-pipelines-Fe6-Kvub-py3.9/lib/python3.9/site-packages/transformers/generation/configuration_utils.py:590: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.0` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`. This was detected when initializing the generation config instance, which means the corresponding file may hold incorrect parameterization and should be fixed.\n",
" warnings.warn(\n",
"/Users/duarteocarmo/Library/Caches/pypoetry/virtualenvs/gtm-insights-intelligence-kfp-pipelines-Fe6-Kvub-py3.9/lib/python3.9/site-packages/transformers/generation/configuration_utils.py:590: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.0` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n",
" warnings.warn(\n",
"/Users/duarteocarmo/Library/Caches/pypoetry/virtualenvs/gtm-insights-intelligence-kfp-pipelines-Fe6-Kvub-py3.9/lib/python3.9/site-packages/transformers/pytorch_utils.py:325: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
" test_elements = torch.tensor(test_elements)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"POSITIVE\n"
]
}
],
"source": [
"classifier = generate.choice(\n",
" model, [\"POSITIVE\", \"NEGATIVE\", \"NEUTRAL\"], sampler=greedy()\n",
")\n",
"answer = classifier(\n",
" create_prompt(\n",
" \"I love the new medication, it has helped me so much. I feel so much better now.\"\n",
" )\n",
")\n",
"print(answer)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# <a id='toc4_'></a>[Pydantic like tutorial](#toc0_)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"msgs_to_classify = [\n",
" \"I love the new medication, it has helped me so much. I feel so much better now.\",\n",
" \"I am not sure about the new medication, it has not helped me at all.\",\n",
" \"The new medication is okay, it has helped me a little bit.\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def create_prompt(message: str) -> str:\n",
" complaint_messages = [\n",
" {\n",
" \"role\": \"user\",\n",
" \"content\": f\"\"\"\n",
" You are a seasoned data analyst at a pharmaceutical company.\n",
" Your aim is to process a message and return the following information in JSON format:\n",
" {{\n",
" 'sentiment': <{\"|\".join([e.value for e in Sentiment])}>,\n",
" }}\n",
" \"\"\",\n",
" },\n",
" {\n",
" \"role\": \"assistant\",\n",
" \"content\": \"I undersand and will process the message in the JSON format you described\",\n",
" },\n",
" {\"role\": \"user\", \"content\": message},\n",
" ]\n",
" complaint_prompt = tokenizer.apply_chat_template(complaint_messages, tokenize=False)\n",
" return complaint_prompt"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"ename": "ValidationError",
"evalue": "1 validation error for SentimentClassification\n__root__\n Extra data: line 1 column 26 (char 25) [type=value_error.jsondecode, input_value='{\"sentiment\": \"POSITIVE\"}AAAA', input_type=str]",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/gtm-insights-intelligence-kfp-pipelines-Fe6-Kvub-py3.9/lib/python3.9/site-packages/pydantic/main.py:1187\u001b[0m, in \u001b[0;36mBaseModel.parse_raw\u001b[0;34m(cls, b, content_type, encoding, proto, allow_pickle)\u001b[0m\n\u001b[1;32m 1186\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1187\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[43mparse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_str_bytes\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1188\u001b[0m \u001b[43m \u001b[49m\u001b[43mb\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1189\u001b[0m \u001b[43m \u001b[49m\u001b[43mproto\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproto\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1190\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcontent_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1191\u001b[0m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1192\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_pickle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_pickle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1193\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1194\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m exc:\n",
"File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/gtm-insights-intelligence-kfp-pipelines-Fe6-Kvub-py3.9/lib/python3.9/site-packages/pydantic/deprecated/parse.py:49\u001b[0m, in \u001b[0;36mload_str_bytes\u001b[0;34m(b, content_type, encoding, proto, allow_pickle, json_loads)\u001b[0m\n\u001b[1;32m 48\u001b[0m b \u001b[38;5;241m=\u001b[39m b\u001b[38;5;241m.\u001b[39mdecode(encoding)\n\u001b[0;32m---> 49\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mjson_loads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m proto \u001b[38;5;241m==\u001b[39m Protocol\u001b[38;5;241m.\u001b[39mpickle:\n",
"File \u001b[0;32m~/.asdf/installs/python/3.9.20/lib/python3.9/json/__init__.py:346\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 344\u001b[0m parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 345\u001b[0m parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[0;32m--> 346\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_decoder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 347\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
"File \u001b[0;32m~/.asdf/installs/python/3.9.20/lib/python3.9/json/decoder.py:340\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 339\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m end \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(s):\n\u001b[0;32m--> 340\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExtra data\u001b[39m\u001b[38;5;124m\"\u001b[39m, s, end)\n\u001b[1;32m 341\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n",
"\u001b[0;31mJSONDecodeError\u001b[0m: Extra data: line 1 column 26 (char 25)",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mValidationError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[9], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m prompts \u001b[38;5;241m=\u001b[39m [create_prompt(text) \u001b[38;5;28;01mfor\u001b[39;00m text \u001b[38;5;129;01min\u001b[39;00m msgs_to_classify]\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m msg \u001b[38;5;129;01min\u001b[39;00m msgs_to_classify:\n\u001b[0;32m----> 5\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[43msentiment_classifier\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcreate_prompt\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmsg\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(msg)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(answer)\n",
"File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/gtm-insights-intelligence-kfp-pipelines-Fe6-Kvub-py3.9/lib/python3.9/site-packages/outlines/generate/api.py:512\u001b[0m, in \u001b[0;36mSequenceGeneratorAdapter.__call__\u001b[0;34m(self, prompts, max_tokens, stop_at, seed, **model_specific_params)\u001b[0m\n\u001b[1;32m 500\u001b[0m generation_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprepare_generation_parameters(\n\u001b[1;32m 501\u001b[0m max_tokens, stop_at, seed\n\u001b[1;32m 502\u001b[0m )\n\u001b[1;32m 504\u001b[0m completions \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mgenerate(\n\u001b[1;32m 505\u001b[0m prompts,\n\u001b[1;32m 506\u001b[0m generation_params,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 509\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_specific_params,\n\u001b[1;32m 510\u001b[0m )\n\u001b[0;32m--> 512\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_format\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcompletions\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/gtm-insights-intelligence-kfp-pipelines-Fe6-Kvub-py3.9/lib/python3.9/site-packages/outlines/generate/api.py:488\u001b[0m, in \u001b[0;36mSequenceGeneratorAdapter._format\u001b[0;34m(self, sequences)\u001b[0m\n\u001b[1;32m 486\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format(sequence) \u001b[38;5;28;01mfor\u001b[39;00m sequence \u001b[38;5;129;01min\u001b[39;00m sequences]\n\u001b[1;32m 487\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 488\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mformat_sequence\u001b[49m\u001b[43m(\u001b[49m\u001b[43msequences\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/gtm-insights-intelligence-kfp-pipelines-Fe6-Kvub-py3.9/lib/python3.9/site-packages/outlines/generate/json.py:50\u001b[0m, in \u001b[0;36mjson.<locals>.<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 48\u001b[0m regex_str \u001b[38;5;241m=\u001b[39m build_regex_from_schema(schema, whitespace_pattern)\n\u001b[1;32m 49\u001b[0m generator \u001b[38;5;241m=\u001b[39m regex(model, regex_str, sampler)\n\u001b[0;32m---> 50\u001b[0m generator\u001b[38;5;241m.\u001b[39mformat_sequence \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mlambda\u001b[39;00m x: \u001b[43mschema_object\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparse_raw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(schema_object):\n\u001b[1;32m 52\u001b[0m schema \u001b[38;5;241m=\u001b[39m pyjson\u001b[38;5;241m.\u001b[39mdumps(get_schema_from_signature(schema_object))\n",
"File \u001b[0;32m~/Library/Caches/pypoetry/virtualenvs/gtm-insights-intelligence-kfp-pipelines-Fe6-Kvub-py3.9/lib/python3.9/site-packages/pydantic/main.py:1214\u001b[0m, in \u001b[0;36mBaseModel.parse_raw\u001b[0;34m(cls, b, content_type, encoding, proto, allow_pickle)\u001b[0m\n\u001b[1;32m 1207\u001b[0m \u001b[38;5;66;03m# ctx is missing here, but since we've added `input` to the error, we're not pretending it's the same\u001b[39;00m\n\u001b[1;32m 1208\u001b[0m error: pydantic_core\u001b[38;5;241m.\u001b[39mInitErrorDetails \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 1209\u001b[0m \u001b[38;5;66;03m# The type: ignore on the next line is to ignore the requirement of LiteralString\u001b[39;00m\n\u001b[1;32m 1210\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m'\u001b[39m: pydantic_core\u001b[38;5;241m.\u001b[39mPydanticCustomError(type_str, \u001b[38;5;28mstr\u001b[39m(exc)), \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 1211\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mloc\u001b[39m\u001b[38;5;124m'\u001b[39m: (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__root__\u001b[39m\u001b[38;5;124m'\u001b[39m,),\n\u001b[1;32m 1212\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124minput\u001b[39m\u001b[38;5;124m'\u001b[39m: b,\n\u001b[1;32m 1213\u001b[0m }\n\u001b[0;32m-> 1214\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m pydantic_core\u001b[38;5;241m.\u001b[39mValidationError\u001b[38;5;241m.\u001b[39mfrom_exception_data(\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, [error])\n\u001b[1;32m 1215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_validate(obj)\n",
"\u001b[0;31mValidationError\u001b[0m: 1 validation error for SentimentClassification\n__root__\n Extra data: line 1 column 26 (char 25) [type=value_error.jsondecode, input_value='{\"sentiment\": \"POSITIVE\"}AAAA', input_type=str]"
]
}
],
"source": [
"sentiment_classifier = outlines.generate.json(model, SentimentClassification)\n",
"prompts = [create_prompt(text) for text in msgs_to_classify]\n",
"\n",
"for msg in msgs_to_classify:\n",
" answer = sentiment_classifier(create_prompt(msg))\n",
" print(msg)\n",
" print(answer)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# <a id='toc5_'></a>[No chat templates, slightly bigger model](#toc0_)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"model = models.transformers(\"meta-llama/Llama-3.2-1B-Instruct\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"name='John' last_name='Smth' id=1\n"
]
}
],
"source": [
"class User(BaseModel):\n",
" name: str\n",
" last_name: str\n",
" id: int\n",
"\n",
"\n",
"generator = generate.json(model, User)\n",
"result = generator(\"Create a user profile with the fields name, last_name and id\")\n",
"print(result)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"sentiment='NEUTRAL'\n"
]
}
],
"source": [
"msg = \"This drug was the best thing I've ever taken. It completely changed my life.\"\n",
"PROMPT = \"\"\"\n",
"You are a seasoned data analyst.\n",
"Classify the following medical-related text into three sentiment classes, namely:\n",
"NEGATIVE: if the text has a negative tone\n",
"POSITIVE: if the text has a positive tone\n",
"NEUTRAL: if the text has a neutral tone\n",
"Consider the overall sentiment of the text.\n",
"\n",
"<text_to_classify>\n",
"{msg}\n",
"</text_to_classify>\n",
"\"\"\".strip()\n",
"\n",
"\n",
"class Sentiment(BaseModel):\n",
" sentiment: str = Field(pattern=\"^(POSITIVE|NEGATIVE|NEUTRAL)$\")\n",
"\n",
"\n",
"generator = generate.json(model, Sentiment)\n",
"result = generator(PROMPT.format(msg=msg))\n",
"print(result)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# <a id='toc6_'></a>[Batch?](#toc0_)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Message: I have a complaint about the product; Sentiment: NEGATIVE\n",
"Message: I will never buy this product again; Sentiment: NEGATIVE\n",
"Message: I love this product, it is the best thing ever; Sentiment: NEUTRAL\n",
"Message: I am neutral about this product; Sentiment: NEUTRAL\n",
"Message: My mom recommended this product to me and I love it; Sentiment: POSITIVE\n",
"Message: I have had a great experience with this product; Sentiment: POSITIVE\n",
"Message: I have a complaint about the product; Sentiment: NEGATIVE\n",
"Message: I will never buy this product again; Sentiment: NEGATIVE\n",
"Message: I love this product, it is the best thing ever; Sentiment: NEUTRAL\n",
"Message: I am neutral about this product; Sentiment: POSITIVE\n",
"Message: My mom recommended this product to me and I love it; Sentiment: POSITIVE\n",
"Message: I have had a great experience with this product; Sentiment: POSITIVE\n",
"Message: I have a complaint about the product; Sentiment: NEGATIVE\n",
"Message: I will never buy this product again; Sentiment: NEGATIVE\n",
"Message: I love this product, it is the best thing ever; Sentiment: NEUTRAL\n",
"Message: I am neutral about this product; Sentiment: NEGATIVE\n",
"Message: My mom recommended this product to me and I love it; Sentiment: NEGATIVE\n",
"Message: I have had a great experience with this product; Sentiment: NEUTRAL\n",
"Message: I have a complaint about the product; Sentiment: NEGATIVE\n",
"Message: I will never buy this product again; Sentiment: NEGATIVE\n",
"Message: I love this product, it is the best thing ever; Sentiment: NEUTRAL\n",
"Message: I am neutral about this product; Sentiment: NEUTRAL\n",
"Message: My mom recommended this product to me and I love it; Sentiment: NEUTRAL\n",
"Message: I have had a great experience with this product; Sentiment: NEUTRAL\n",
"Message: I have a complaint about the product; Sentiment: NEGATIVE\n",
"Message: I will never buy this product again; Sentiment: NEGATIVE\n",
"Message: I love this product, it is the best thing ever; Sentiment: NEUTRAL\n",
"Message: I am neutral about this product; Sentiment: NEUTRAL\n",
"Message: My mom recommended this product to me and I love it; Sentiment: POSITIVE\n",
"Message: I have had a great experience with this product; Sentiment: NEUTRAL\n"
]
}
],
"source": [
"messages = [\n",
" \"I have a complaint about the product\",\n",
" \"I will never buy this product again\",\n",
" \"I love this product, it is the best thing ever\",\n",
" \"I am neutral about this product\",\n",
" \"My mom recommended this product to me and I love it\",\n",
" \"I have had a great experience with this product\",\n",
"] * 5\n",
"prompts = [PROMPT.format(msg=msg) for msg in messages]\n",
"results = generator(prompts)\n",
"\n",
"for m, r in zip(messages, results):\n",
" print(f\"Message: {m}; Sentiment: {r.sentiment}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "gtm-insights-intelligence-kfp-pipelines-Fe6-Kvub-py3.9",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.20"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment