Skip to content

Instantly share code, notes, and snippets.

@flyfir248
Created November 5, 2023 17:16
Show Gist options
  • Save flyfir248/d4b4113b362905a77664040cd8dcec43 to your computer and use it in GitHub Desktop.
Save flyfir248/d4b4113b362905a77664040cd8dcec43 to your computer and use it in GitHub Desktop.
Jina V2 Embeddings.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/flyfir248/d4b4113b362905a77664040cd8dcec43/jina-v2-embeddings.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "80eb6fba",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:22:20.546363Z",
"start_time": "2023-10-30T12:22:20.541655Z"
},
"id": "80eb6fba"
},
"outputs": [],
"source": [
"from transformers import AutoModel\n",
"from numpy.linalg import norm"
]
},
{
"cell_type": "markdown",
"id": "8e8c4d7f",
"metadata": {
"id": "8e8c4d7f"
},
"source": [
"## Mathematical Formula"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d2e5b029",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:23:21.704602Z",
"start_time": "2023-10-30T12:23:21.699205Z"
},
"id": "d2e5b029"
},
"outputs": [],
"source": [
"cos_sim = lambda a,b: (a @ b.T) / (norm(a)*norm(b))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "93800fa3",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:23:25.016348Z",
"start_time": "2023-10-30T12:23:25.005782Z"
},
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "93800fa3",
"outputId": "1e62d6bf-c172-4111-809d-fc3838d18c35"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<function __main__.<lambda>(a, b)>"
]
},
"metadata": {},
"execution_count": 3
}
],
"source": [
"cos_sim"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "d8262e38",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:25:09.557983Z",
"start_time": "2023-10-30T12:24:20.638049Z"
},
"id": "d8262e38"
},
"outputs": [],
"source": [
"model = AutoModel.from_pretrained(\"jinaai/jina-embeddings-v2-base-en\", trust_remote_code=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "92378ceb",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:26:40.764899Z",
"start_time": "2023-10-30T12:26:40.472577Z"
},
"id": "92378ceb"
},
"outputs": [],
"source": [
"embeddings = model.encode([\"How is the weather today?\", \"What is the current weather like today?\"])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "9935b11d",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:26:46.049706Z",
"start_time": "2023-10-30T12:26:46.034203Z"
},
"id": "9935b11d",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "9e36f2e5-5169-49b7-e66d-6932e78cad2a"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[-0.348271 , -0.60091805, 0.60223645, ..., -0.25232717,\n",
" 0.23249912, -0.702648 ],\n",
" [-0.11724895, -0.89896125, 0.4500912 , ..., -0.02847639,\n",
" -0.2287147 , -0.4228289 ]], dtype=float32)"
]
},
"metadata": {},
"execution_count": 6
}
],
"source": [
"embeddings"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c760dd63",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:27:05.502760Z",
"start_time": "2023-10-30T12:27:05.491801Z"
},
"id": "c760dd63",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "084e48e4-e91d-403b-f8f8-f69a9ed3e542"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.9341315"
]
},
"metadata": {},
"execution_count": 7
}
],
"source": [
"cos_sim(embeddings[0], embeddings[1])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "9233db51",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:29:18.261885Z",
"start_time": "2023-10-30T12:29:18.256849Z"
},
"id": "9233db51"
},
"outputs": [],
"source": [
"def compute_similarity(sentence1, sentence2):\n",
" embeddings = model.encode([sentence1, sentence2])\n",
" result = cos_sim(embeddings[0], embeddings[1])\n",
" return result\n",
"# Example usage:\n",
"# similarity = compute_similarity(\"How is the weather today?\", \"What is the current weather like today?\", model)\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "6d3d6aae",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:29:54.267613Z",
"start_time": "2023-10-30T12:29:54.096207Z"
},
"id": "6d3d6aae"
},
"outputs": [],
"source": [
"similarity = compute_similarity(\"I love apple.\", \"I like Banana.\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f5020ba9",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:29:57.216251Z",
"start_time": "2023-10-30T12:29:57.209619Z"
},
"id": "f5020ba9",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "2d3a41b3-ec97-46d6-ab06-1877c5239589"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.77415395"
]
},
"metadata": {},
"execution_count": 10
}
],
"source": [
"similarity"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "cb62050e",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:31:34.917240Z",
"start_time": "2023-10-30T12:31:34.795440Z"
},
"id": "cb62050e"
},
"outputs": [],
"source": [
"similarity = compute_similarity(\"I like OpenAI.\", \"I like OpenAI.\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "f51071bc",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:31:37.544817Z",
"start_time": "2023-10-30T12:31:37.537919Z"
},
"id": "f51071bc",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "a3499d1b-3665-486a-b947-9b72d4414b8f"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"1.0000001"
]
},
"metadata": {},
"execution_count": 12
}
],
"source": [
"similarity"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "923678ac",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:31:58.523252Z",
"start_time": "2023-10-30T12:31:58.394500Z"
},
"id": "923678ac"
},
"outputs": [],
"source": [
"similarity = compute_similarity(\"I like OpenAI.\", \"I don't like OpenAI.\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "d69ad057",
"metadata": {
"ExecuteTime": {
"end_time": "2023-10-30T12:32:00.069996Z",
"start_time": "2023-10-30T12:32:00.064078Z"
},
"id": "d69ad057",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "3ea7b0a2-8219-45ea-d116-b9f6aba96044"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0.8971375"
]
},
"metadata": {},
"execution_count": 14
}
],
"source": [
"similarity"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "147df01f",
"metadata": {
"id": "147df01f",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "74834d6d-55f1-4e01-fcd3-7b9235f5cadd"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Cosine Similarity: 0.9341315\n"
]
}
],
"source": [
"from transformers import AutoModel\n",
"from numpy.linalg import norm\n",
"\n",
"# Define cosine similarity function\n",
"cos_sim = lambda a, b: (a @ b.T) / (norm(a) * norm(b))\n",
"\n",
"# Load Jina Embeddings model from Hugging Face Transformers\n",
"model = AutoModel.from_pretrained(\"jinaai/jina-embeddings-v2-base-en\", trust_remote_code=True)\n",
"\n",
"# Encode sentences and compute embeddings\n",
"embeddings = model.encode([\"How is the weather today?\", \"What is the current weather like today?\"])\n",
"\n",
"# Calculate cosine similarity between the embeddings\n",
"similarity = cos_sim(embeddings[0], embeddings[1])\n",
"print(\"Cosine Similarity:\", similarity)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
},
"colab": {
"provenance": [],
"include_colab_link": true
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment