virattt/exploring-llama2-7b.ipynb

## exploring-llama2-7b.ipynb
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4",
      "authorship_tag": "ABX9TyN35HSozziwQR3AxdftlgbF",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/virattt/27af14b54c4ff4135f3071ecfb7bab67/exploring-llama2-7b.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Step 1. Install dependencies"
      ],
      "metadata": {
        "id": "4qN24LvhqR4o"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "dIYs_9BZiWHA"
      },
      "outputs": [],
      "source": [
        "# GPU llama-cpp-python\n",
        "!CMAKE_ARGS=\"-DLLAMA_CUBLAS=on\" FORCE_CMAKE=1 pip install llama-cpp-python==0.1.78 numpy==1.23.4 --force-reinstall --upgrade --no-cache-dir --verbose\n",
        "!pip install huggingface_hub\n",
        "!pip install llama-cpp-python==0.1.78\n",
        "!pip install numpy==1.23.4"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Step 2. Define model name"
      ],
      "metadata": {
        "id": "HC_VHz0zqU3Q"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from huggingface_hub import hf_hub_download\n",
        "from llama_cpp import Llama\n",
        "\n",
        "model_name_or_path = \"TheBloke/Llama-2-7B-Chat-GGML\"\n",
        "model_basename = \"llama-2-7b-chat.ggmlv3.q2_K.bin\" # the model is in bin format"
      ],
      "metadata": {
        "id": "eAEF4byiidHc"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Step 3. Download the model from HuggingFace"
      ],
      "metadata": {
        "id": "7CnHJLpAqYmD"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Download the model\n",
        "model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)"
      ],
      "metadata": {
        "id": "-r-9cbS0ikda"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Load the model\n",
        "llm = Llama(\n",
        "    model_path=model_path,\n",
        "    n_threads=2,\n",
        "    n_batch=512,\n",
        "    n_gpu_layers=32,\n",
        ")"
      ],
      "metadata": {
        "id": "y9ckVxyCimBj"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Step 4. Create prompt"
      ],
      "metadata": {
        "id": "ZE0KF-qeqd9y"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Create prompt\n",
        "prompt = \"What language family is the language of the tribe of the man who instructed Jeff Ball in?\"\n",
        "prompt_template=f'''SYSTEM: Rewrite the following user query into a clear, specific, and\n",
        "formal request suitable for retrieving relevant information from a vector database.\n",
        "Keep in mind that your rewritten query will be sent to a vector database, which\n",
        "does similarity search for retrieving documents.\n",
        "\n",
        "USER: {prompt}\n",
        "\n",
        "ASSISTANT:\n",
        "'''"
      ],
      "metadata": {
        "id": "pbMRnQx2ioOq"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Step 5. Query the model"
      ],
      "metadata": {
        "id": "OO5co-BmqgPn"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import time\n",
        "\n",
        "start = time.time()\n",
        "response = llm(\n",
        "    prompt=prompt_template,\n",
        "    max_tokens=125,\n",
        "    temperature=0.0,\n",
        "    top_p=0.95,\n",
        "    repeat_penalty=1.2,\n",
        "    top_k=150,\n",
        ")\n",
        "end = time.time()\n",
        "\n",
        "print(f\"Took {end - start} seconds to run.\")"
      ],
      "metadata": {
        "id": "SHOuiWAVit2J"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(response[\"choices\"][0][\"text\"])"
      ],
      "metadata": {
        "id": "pT9Kkff_ixuC"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": [],
	"gpuType": "T4",
	"authorship_tag": "ABX9TyN35HSozziwQR3AxdftlgbF",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	},
	"accelerator": "GPU"
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/virattt/27af14b54c4ff4135f3071ecfb7bab67/exploring-llama2-7b.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"# Step 1. Install dependencies"
	],
	"metadata": {
	"id": "4qN24LvhqR4o"
	}
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "dIYs_9BZiWHA"
	},
	"outputs": [],
	"source": [
	"# GPU llama-cpp-python\n",
	"!CMAKE_ARGS=\"-DLLAMA_CUBLAS=on\" FORCE_CMAKE=1 pip install llama-cpp-python==0.1.78 numpy==1.23.4 --force-reinstall --upgrade --no-cache-dir --verbose\n",
	"!pip install huggingface_hub\n",
	"!pip install llama-cpp-python==0.1.78\n",
	"!pip install numpy==1.23.4"
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"# Step 2. Define model name"
	],
	"metadata": {
	"id": "HC_VHz0zqU3Q"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"from huggingface_hub import hf_hub_download\n",
	"from llama_cpp import Llama\n",
	"\n",
	"model_name_or_path = \"TheBloke/Llama-2-7B-Chat-GGML\"\n",
	"model_basename = \"llama-2-7b-chat.ggmlv3.q2_K.bin\" # the model is in bin format"
	],
	"metadata": {
	"id": "eAEF4byiidHc"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"# Step 3. Download the model from HuggingFace"
	],
	"metadata": {
	"id": "7CnHJLpAqYmD"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"# Download the model\n",
	"model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)"
	],
	"metadata": {
	"id": "-r-9cbS0ikda"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"# Load the model\n",
	"llm = Llama(\n",
	" model_path=model_path,\n",
	" n_threads=2,\n",
	" n_batch=512,\n",
	" n_gpu_layers=32,\n",
	")"
	],
	"metadata": {
	"id": "y9ckVxyCimBj"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"# Step 4. Create prompt"
	],
	"metadata": {
	"id": "ZE0KF-qeqd9y"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"# Create prompt\n",
	"prompt = \"What language family is the language of the tribe of the man who instructed Jeff Ball in?\"\n",
	"prompt_template=f'''SYSTEM: Rewrite the following user query into a clear, specific, and\n",
	"formal request suitable for retrieving relevant information from a vector database.\n",
	"Keep in mind that your rewritten query will be sent to a vector database, which\n",
	"does similarity search for retrieving documents.\n",
	"\n",
	"USER: {prompt}\n",
	"\n",
	"ASSISTANT:\n",
	"'''"
	],
	"metadata": {
	"id": "pbMRnQx2ioOq"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"# Step 5. Query the model"
	],
	"metadata": {
	"id": "OO5co-BmqgPn"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"import time\n",
	"\n",
	"start = time.time()\n",
	"response = llm(\n",
	" prompt=prompt_template,\n",
	" max_tokens=125,\n",
	" temperature=0.0,\n",
	" top_p=0.95,\n",
	" repeat_penalty=1.2,\n",
	" top_k=150,\n",
	")\n",
	"end = time.time()\n",
	"\n",
	"print(f\"Took {end - start} seconds to run.\")"
	],
	"metadata": {
	"id": "SHOuiWAVit2J"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"print(response[\"choices\"][0][\"text\"])"
	],
	"metadata": {
	"id": "pT9Kkff_ixuC"
	},
	"execution_count": null,
	"outputs": []
	}
	]
	}