-
-
Save jamescalam/29b5eed4a5038cd6653115f44c3257a2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "2e1c5458-c25a-4150-adc2-e783bcbe2b9c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'matches': [\n", | |
" {'id': 'coaaSxys5so-t129.0',\n", | |
" 'metadata': {\n", | |
" 'end': 157.0,\n", | |
" 'start': 129.0,\n", | |
" 'text': \"Let's ask another question. So this one's \"\n", | |
" \"not even really a question. I'm just going \"\n", | |
" 'to say OpenAI Clip. And what I want to do '\n", | |
" 'is just say okay can you summarize what '\n", | |
" 'OpenAI Clip is.',\n", | |
" 'title': 'How to build next-level Q&A with OpenAI',\n", | |
" 'url': 'https://youtu.be/coaaSxys5so'},\n", | |
" 'score': 33.6478119,\n", | |
" 'values': []},\n", | |
" {'id': 'coaaSxys5so-t147.0',\n", | |
" 'metadata': {\n", | |
" 'end': 183.0,\n", | |
" 'start': 147.0,\n", | |
" 'text': \"So we'll come down here. Let's see what it \"\n", | |
" 'returns. Cool so OpenAI Clip is a '\n", | |
" 'contrastive language image pre-training '\n", | |
" 'model that uses pairs of images and text '\n", | |
" 'and returns a matrix of cosine similarity '\n", | |
" \"between text and each image. Okay that's \"\n", | |
" 'cool. So written in PyTorch uses bcelas.',\n", | |
" 'title': 'How to build next-level Q&A with OpenAI',\n", | |
" 'url': 'https://youtu.be/coaaSxys5so'},\n", | |
" 'score': 31.5986061,\n", | |
" 'values': []},\n", | |
" {'id': 'bVZJ_O_-t2085.44',\n", | |
" 'metadata': {\n", | |
" 'end': 2131.7599999999998,\n", | |
" 'start': 2085.44,\n", | |
" 'text': \"OpenAI clip VIT so it's the vision \"\n", | |
" 'transformer this VIT you see here refers '\n", | |
" 'to the the vision transformer which clip '\n", | |
" 'is using or is based on at least the '\n", | |
" 'vision aspect and we want to write base '\n", | |
" \"patch 32. So I mean we'll go into more \"\n", | |
" 'detail but the patch part of that is '\n", | |
" 'referring to the way that the model almost '\n", | |
" 'tokenizes your images it splits an image',\n", | |
" 'title': 'Intro to Dense Vectors for NLP and Vision',\n", | |
" 'url': 'https://youtu.be/bVZJ_O_-0RE'},\n", | |
" 'score': 31.4537525,\n", | |
" 'values': []},\n", | |
" {'id': '989aKUVBfbk-t35.0',\n", | |
" 'metadata': {\n", | |
" 'end': 88.5,\n", | |
" 'start': 35.0,\n", | |
" 'text': 'During pre-training OpenAI trained the '\n", | |
" 'model on pairs of images and text and it '\n", | |
" 'trained them to both output embedding '\n", | |
" 'vectors that are as close as possible to '\n", | |
" 'each other. So the text transformer was '\n", | |
" 'trained to output a single embedding 512 '\n", | |
" 'dimensional embedding that was as close as '\n", | |
" \"possible to the vision transformer's image \"\n", | |
" 'embedding for the image text pair. So what '\n", | |
" 'that means is that clip is able to take '\n", | |
" 'both images and text and embed them both '\n", | |
" 'into a similar vector space. And with that '\n", | |
" 'we can do a lot of things.',\n", | |
" 'title': 'Fast intro to multi-modal ML with '\n", | |
" \"OpenAI's CLIP\",\n", | |
" 'url': 'https://youtu.be/989aKUVBfbk'},\n", | |
" 'score': 31.4496136,\n", | |
" 'values': []},\n", | |
" {'id': '989aKUVBfbk-t98.0',\n", | |
" 'metadata': {\n", | |
" 'end': 119.0,\n", | |
" 'start': 98.0,\n", | |
" 'text': 'OpenAI released a GitHub repository OpenAI '\n", | |
" \"clip here. This contains clip but we're \"\n", | |
" 'not going to use this implementation. '\n", | |
" \"We're actually going to use this \"\n", | |
" 'implementation of clip. So this is on '\n", | |
" 'Hugging Face.',\n", | |
" 'title': 'Fast intro to multi-modal ML with '\n", | |
" \"OpenAI's CLIP\",\n", | |
" 'url': 'https://youtu.be/989aKUVBfbk'},\n", | |
" 'score': 29.3169785,\n", | |
" 'values': []}],\n", | |
" 'namespace': ''}" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"query = \"what is OpenAI's CLIP?\"\n", | |
"\n", | |
"xq = model.encode(query).tolist()\n", | |
"\n", | |
"index.query(xq, top_k=5, include_metadata=True)" | |
] | |
} | |
], | |
"metadata": { | |
"environment": { | |
"kernel": "python3", | |
"name": "common-cu110.m95", | |
"type": "gcloud", | |
"uri": "gcr.io/deeplearning-platform-release/base-cu110:m95" | |
}, | |
"kernelspec": { | |
"display_name": "Python 3.9.12 ('ml')", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.12" | |
}, | |
"vscode": { | |
"interpreter": { | |
"hash": "b8e7999f96e1b425e2d542f21b571f5a4be3e97158b0b46ea1b2500df63956ce" | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment