jamescalam/whisper-yt-search-query.ipynb Secret

## whisper-yt-search-query.ipynb
{
    "cells": [
     {
      "cell_type": "code",
      "execution_count": 10,
      "id": "2e1c5458-c25a-4150-adc2-e783bcbe2b9c",
      "metadata": {},
      "outputs": [
       {
        "data": {
         "text/plain": [
          "{'matches': [\n",
          "  {'id': 'coaaSxys5so-t129.0',\n",
          "   'metadata': {\n",
          "     'end': 157.0,\n",
          "     'start': 129.0,\n",
          "     'text': \"Let's ask another question. So this one's \"\n",
          "             \"not even really a question. I'm just going \"\n",
          "             'to say OpenAI Clip. And what I want to do '\n",
          "             'is just say okay can you summarize what '\n",
          "             'OpenAI Clip is.',\n",
          "     'title': 'How to build next-level Q&A with OpenAI',\n",
          "     'url': 'https://youtu.be/coaaSxys5so'},\n",
          "   'score': 33.6478119,\n",
          "   'values': []},\n",
          "  {'id': 'coaaSxys5so-t147.0',\n",
          "   'metadata': {\n",
          "     'end': 183.0,\n",
          "     'start': 147.0,\n",
          "     'text': \"So we'll come down here. Let's see what it \"\n",
          "             'returns. Cool so OpenAI Clip is a '\n",
          "             'contrastive language image pre-training '\n",
          "             'model that uses pairs of images and text '\n",
          "             'and returns a matrix of cosine similarity '\n",
          "             \"between text and each image. Okay that's \"\n",
          "             'cool. So written in PyTorch uses bcelas.',\n",
          "     'title': 'How to build next-level Q&A with OpenAI',\n",
          "     'url': 'https://youtu.be/coaaSxys5so'},\n",
          "   'score': 31.5986061,\n",
          "   'values': []},\n",
          "  {'id': 'bVZJ_O_-t2085.44',\n",
          "   'metadata': {\n",
          "     'end': 2131.7599999999998,\n",
          "     'start': 2085.44,\n",
          "     'text': \"OpenAI clip VIT so it's the vision \"\n",
          "             'transformer this VIT you see here refers '\n",
          "             'to the the vision transformer which clip '\n",
          "             'is using or is based on at least the '\n",
          "             'vision aspect and we want to write base '\n",
          "             \"patch 32. So I mean we'll go into more \"\n",
          "             'detail but the patch part of that is '\n",
          "             'referring to the way that the model almost '\n",
          "             'tokenizes your images it splits an image',\n",
          "     'title': 'Intro to Dense Vectors for NLP and Vision',\n",
          "     'url': 'https://youtu.be/bVZJ_O_-0RE'},\n",
          "   'score': 31.4537525,\n",
          "   'values': []},\n",
          "  {'id': '989aKUVBfbk-t35.0',\n",
          "   'metadata': {\n",
          "     'end': 88.5,\n",
          "     'start': 35.0,\n",
          "     'text': 'During pre-training OpenAI trained the '\n",
          "             'model on pairs of images and text and it '\n",
          "             'trained them to both output embedding '\n",
          "             'vectors that are as close as possible to '\n",
          "             'each other. So the text transformer was '\n",
          "             'trained to output a single embedding 512 '\n",
          "             'dimensional embedding that was as close as '\n",
          "             \"possible to the vision transformer's image \"\n",
          "             'embedding for the image text pair. So what '\n",
          "             'that means is that clip is able to take '\n",
          "             'both images and text and embed them both '\n",
          "             'into a similar vector space. And with that '\n",
          "             'we can do a lot of things.',\n",
          "     'title': 'Fast intro to multi-modal ML with '\n",
          "              \"OpenAI's CLIP\",\n",
          "     'url': 'https://youtu.be/989aKUVBfbk'},\n",
          "   'score': 31.4496136,\n",
          "   'values': []},\n",
          "  {'id': '989aKUVBfbk-t98.0',\n",
          "   'metadata': {\n",
          "     'end': 119.0,\n",
          "     'start': 98.0,\n",
          "     'text': 'OpenAI released a GitHub repository OpenAI '\n",
          "             \"clip here. This contains clip but we're \"\n",
          "             'not going to use this implementation. '\n",
          "             \"We're actually going to use this \"\n",
          "             'implementation of clip. So this is on '\n",
          "             'Hugging Face.',\n",
          "     'title': 'Fast intro to multi-modal ML with '\n",
          "              \"OpenAI's CLIP\",\n",
          "     'url': 'https://youtu.be/989aKUVBfbk'},\n",
          "   'score': 29.3169785,\n",
          "   'values': []}],\n",
          " 'namespace': ''}"
         ]
        },
        "execution_count": 10,
        "metadata": {},
        "output_type": "execute_result"
       }
      ],
      "source": [
       "query = \"what is OpenAI's CLIP?\"\n",
       "\n",
       "xq = model.encode(query).tolist()\n",
       "\n",
       "index.query(xq, top_k=5, include_metadata=True)"
      ]
     }
    ],
    "metadata": {
     "environment": {
      "kernel": "python3",
      "name": "common-cu110.m95",
      "type": "gcloud",
      "uri": "gcr.io/deeplearning-platform-release/base-cu110:m95"
     },
     "kernelspec": {
      "display_name": "Python 3.9.12 ('ml')",
      "language": "python",
      "name": "python3"
     },
     "language_info": {
      "codemirror_mode": {
       "name": "ipython",
       "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.12"
     },
     "vscode": {
      "interpreter": {
       "hash": "b8e7999f96e1b425e2d542f21b571f5a4be3e97158b0b46ea1b2500df63956ce"
      }
     }
    },
    "nbformat": 4,
    "nbformat_minor": 5
   }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 10,
	"id": "2e1c5458-c25a-4150-adc2-e783bcbe2b9c",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"{'matches': [\n",
	" {'id': 'coaaSxys5so-t129.0',\n",
	" 'metadata': {\n",
	" 'end': 157.0,\n",
	" 'start': 129.0,\n",
	" 'text': \"Let's ask another question. So this one's \"\n",
	" \"not even really a question. I'm just going \"\n",
	" 'to say OpenAI Clip. And what I want to do '\n",
	" 'is just say okay can you summarize what '\n",
	" 'OpenAI Clip is.',\n",
	" 'title': 'How to build next-level Q&A with OpenAI',\n",
	" 'url': 'https://youtu.be/coaaSxys5so'},\n",
	" 'score': 33.6478119,\n",
	" 'values': []},\n",
	" {'id': 'coaaSxys5so-t147.0',\n",
	" 'metadata': {\n",
	" 'end': 183.0,\n",
	" 'start': 147.0,\n",
	" 'text': \"So we'll come down here. Let's see what it \"\n",
	" 'returns. Cool so OpenAI Clip is a '\n",
	" 'contrastive language image pre-training '\n",
	" 'model that uses pairs of images and text '\n",
	" 'and returns a matrix of cosine similarity '\n",
	" \"between text and each image. Okay that's \"\n",
	" 'cool. So written in PyTorch uses bcelas.',\n",
	" 'title': 'How to build next-level Q&A with OpenAI',\n",
	" 'url': 'https://youtu.be/coaaSxys5so'},\n",
	" 'score': 31.5986061,\n",
	" 'values': []},\n",
	" {'id': 'bVZJ_O_-t2085.44',\n",
	" 'metadata': {\n",
	" 'end': 2131.7599999999998,\n",
	" 'start': 2085.44,\n",
	" 'text': \"OpenAI clip VIT so it's the vision \"\n",
	" 'transformer this VIT you see here refers '\n",
	" 'to the the vision transformer which clip '\n",
	" 'is using or is based on at least the '\n",
	" 'vision aspect and we want to write base '\n",
	" \"patch 32. So I mean we'll go into more \"\n",
	" 'detail but the patch part of that is '\n",
	" 'referring to the way that the model almost '\n",
	" 'tokenizes your images it splits an image',\n",
	" 'title': 'Intro to Dense Vectors for NLP and Vision',\n",
	" 'url': 'https://youtu.be/bVZJ_O_-0RE'},\n",
	" 'score': 31.4537525,\n",
	" 'values': []},\n",
	" {'id': '989aKUVBfbk-t35.0',\n",
	" 'metadata': {\n",
	" 'end': 88.5,\n",
	" 'start': 35.0,\n",
	" 'text': 'During pre-training OpenAI trained the '\n",
	" 'model on pairs of images and text and it '\n",
	" 'trained them to both output embedding '\n",
	" 'vectors that are as close as possible to '\n",
	" 'each other. So the text transformer was '\n",
	" 'trained to output a single embedding 512 '\n",
	" 'dimensional embedding that was as close as '\n",
	" \"possible to the vision transformer's image \"\n",
	" 'embedding for the image text pair. So what '\n",
	" 'that means is that clip is able to take '\n",
	" 'both images and text and embed them both '\n",
	" 'into a similar vector space. And with that '\n",
	" 'we can do a lot of things.',\n",
	" 'title': 'Fast intro to multi-modal ML with '\n",
	" \"OpenAI's CLIP\",\n",
	" 'url': 'https://youtu.be/989aKUVBfbk'},\n",
	" 'score': 31.4496136,\n",
	" 'values': []},\n",
	" {'id': '989aKUVBfbk-t98.0',\n",
	" 'metadata': {\n",
	" 'end': 119.0,\n",
	" 'start': 98.0,\n",
	" 'text': 'OpenAI released a GitHub repository OpenAI '\n",
	" \"clip here. This contains clip but we're \"\n",
	" 'not going to use this implementation. '\n",
	" \"We're actually going to use this \"\n",
	" 'implementation of clip. So this is on '\n",
	" 'Hugging Face.',\n",
	" 'title': 'Fast intro to multi-modal ML with '\n",
	" \"OpenAI's CLIP\",\n",
	" 'url': 'https://youtu.be/989aKUVBfbk'},\n",
	" 'score': 29.3169785,\n",
	" 'values': []}],\n",
	" 'namespace': ''}"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"query = \"what is OpenAI's CLIP?\"\n",
	"\n",
	"xq = model.encode(query).tolist()\n",
	"\n",
	"index.query(xq, top_k=5, include_metadata=True)"
	]
	}
	],
	"metadata": {
	"environment": {
	"kernel": "python3",
	"name": "common-cu110.m95",
	"type": "gcloud",
	"uri": "gcr.io/deeplearning-platform-release/base-cu110:m95"
	},
	"kernelspec": {
	"display_name": "Python 3.9.12 ('ml')",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.9.12"
	},
	"vscode": {
	"interpreter": {
	"hash": "b8e7999f96e1b425e2d542f21b571f5a4be3e97158b0b46ea1b2500df63956ce"
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}