Created
July 7, 2025 06:07
-
-
Save immortal3/6af71b0f9be87489d13a7e0f2cf68120 to your computer and use it in GitHub Desktop.
Comparing Embeddings of OpenAI vs Qwen3-Embedding-8B with Instruction Aware
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import openai | |
| import numpy as np | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| # Test queries | |
| queries = [ | |
| "I want to buy apple", | |
| "how can i erase object from image", | |
| ] | |
| # Sample documents | |
| documents = [ | |
| { | |
| 'title': 'How to Buy Apple Stock', | |
| 'content': 'Apple Inc. (AAPL) is one of the most valuable publicly traded companies. To buy Apple stock, you can use online brokers like E*TRADE, Robinhood, or Charles Schwab.' | |
| }, | |
| { | |
| 'title': 'iPhone 15 Pro Purchase Guide', | |
| 'content': 'The iPhone 15 Pro is available for purchase at Apple Store, Best Buy, Amazon, and carrier stores like Verizon and AT&T. Prices start at $999 for the base model.' | |
| }, | |
| { | |
| 'title': 'Google Gemini Photo Editing Tutorial', | |
| 'content': 'Google Gemini AI can help edit photos by removing unwanted objects, enhancing image quality, and applying artistic filters. The Magic Eraser feature uses AI to intelligently remove objects from images.' | |
| }, | |
| { | |
| 'title': 'Deep Learning for Object Removal', | |
| 'content': 'Deep learning approaches for object removal in images include inpainting neural networks that can fill missing regions. CNNs are trained on large datasets to learn how to reconstruct plausible image content.' | |
| }, | |
| { | |
| 'title': 'Health Benefits of Eating Apples', | |
| 'content': 'Apples are rich in fiber, vitamin C, and antioxidants. They can be purchased at grocery stores, farmers markets, or online. Organic apples are available for those preferring pesticide-free options.' | |
| }, | |
| { | |
| 'title': 'Where to Buy Apples', | |
| 'content': 'You can buy apples at local farmers markets, grocery stores, or online retailers like Instacart and Amazon Fresh. Choose fresh, crisp apples in season for best quality.' | |
| } | |
| ] | |
| def get_openai_embedding(text, model="text-embedding-3-small"): | |
| """Get OpenAI embedding for text""" | |
| response = openai.embeddings.create(model=model, input=text) | |
| return response.data[0].embedding | |
| def get_qwen_embedding(text, model="Qwen/Qwen3-Embedding-8B"): | |
| """Get Qwen embedding for text""" | |
| DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY") | |
| deepinfra_openai = openai.OpenAI( | |
| api_key=DEEPINFRA_API_KEY, | |
| base_url="https://api.deepinfra.com/v1/openai", | |
| ) | |
| embeddings = deepinfra_openai.embeddings.create( | |
| model=model, | |
| input=text, | |
| encoding_format="float" | |
| ) | |
| return embeddings.data[0].embedding | |
| def compare_embeddings(query, instruction=None): | |
| """Compare plain vs instruction-aware embeddings using both OpenAI and Qwen models""" | |
| print(f"\nπ Query: '{query}'") | |
| print("-" * 50) | |
| # Prepare query | |
| if instruction: | |
| query_text = f"Instruction: {instruction}\nQuery: {query}" | |
| print(f"π― Using instruction: {instruction}") | |
| else: | |
| query_text = query | |
| print("π Using plain query (no instruction)") | |
| # Test both models | |
| models = [ | |
| ("OpenAI", get_openai_embedding), | |
| ("Qwen", get_qwen_embedding) | |
| ] | |
| for model_name, embedding_func in models: | |
| print(f"\nπ€ {model_name} Model Results:") | |
| print("-" * 30) | |
| try: | |
| # Get embeddings | |
| query_embedding = embedding_func(query_text) | |
| doc_embeddings = [embedding_func(f"{doc['title']} - {doc['content']}") for doc in documents] | |
| # Calculate similarities | |
| similarities = cosine_similarity([query_embedding], doc_embeddings)[0] | |
| # Get top 3 results | |
| top_indices = np.argsort(similarities)[::-1][:3] | |
| print("π Top 3 Results:") | |
| for i, idx in enumerate(top_indices): | |
| doc = documents[idx] | |
| score = similarities[idx] | |
| print(f"{i+1}. {doc['title']} (Score: {score:.3f})") | |
| print(f"π Avg Score: {np.mean(similarities[top_indices]):.3f}") | |
| except Exception as e: | |
| print(f"β Error with {model_name}: {e}") | |
| return None | |
| def demonstrate(): | |
| """Demonstrate instruction-aware vs plain embeddings""" | |
| print("π INSTRUCTION-AWARE EMBEDDINGS DEMO") | |
| print("=" * 60) | |
| # Test cases | |
| test_cases = [ | |
| { | |
| 'query': 'I want to buy apple', | |
| 'instructions': [ | |
| None, # Plain embedding | |
| 'Given a financial question, retrieve investment information', | |
| 'Given a technology question, retrieve product information', | |
| 'Given a grocery shopping question, retrieve fruit purchase information' | |
| ] | |
| }, | |
| { | |
| 'query': 'how can i erase object from image', | |
| 'instructions': [ | |
| None, # Plain embedding | |
| 'Given a photo editing question, retrieve tutorial information', | |
| 'Given a research question, retrieve technical information' | |
| ] | |
| } | |
| ] | |
| for test_case in test_cases: | |
| query = test_case['query'] | |
| instructions = test_case['instructions'] | |
| print(f"\n{'='*60}") | |
| print(f"TESTING: {query}") | |
| print(f"{'='*60}") | |
| for instruction in instructions: | |
| compare_embeddings(query, instruction) | |
| print() | |
| if __name__ == "__main__": | |
| # Set your OpenAI API key | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| demonstrate() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| π INSTRUCTION-AWARE EMBEDDINGS DEMO | |
| ============================================================ | |
| ============================================================ | |
| TESTING: I want to buy apple | |
| ============================================================ | |
| π Query: 'I want to buy apple' | |
| -------------------------------------------------- | |
| π Using plain query (no instruction) | |
| π€ OpenAI Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. How to Buy Apple Stock (Score: 0.536) | |
| 2. Where to Buy Apples (Score: 0.497) | |
| 3. iPhone 15 Pro Purchase Guide (Score: 0.455) | |
| π Avg Score: 0.496 | |
| π€ Qwen Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. Where to Buy Apples (Score: 0.604) | |
| 2. How to Buy Apple Stock (Score: 0.594) | |
| 3. Health Benefits of Eating Apples (Score: 0.501) | |
| π Avg Score: 0.566 | |
| π Query: 'I want to buy apple' | |
| -------------------------------------------------- | |
| π― Using instruction: Given a financial question, retrieve investment information | |
| π€ OpenAI Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. How to Buy Apple Stock (Score: 0.472) | |
| 2. Where to Buy Apples (Score: 0.334) | |
| 3. iPhone 15 Pro Purchase Guide (Score: 0.320) | |
| π Avg Score: 0.375 | |
| π€ Qwen Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. How to Buy Apple Stock (Score: 0.743) | |
| 2. Where to Buy Apples (Score: 0.521) | |
| 3. iPhone 15 Pro Purchase Guide (Score: 0.486) | |
| π Avg Score: 0.583 | |
| π Query: 'I want to buy apple' | |
| -------------------------------------------------- | |
| π― Using instruction: Given a technology question, retrieve product information | |
| π€ OpenAI Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. How to Buy Apple Stock (Score: 0.410) | |
| 2. iPhone 15 Pro Purchase Guide (Score: 0.393) | |
| 3. Where to Buy Apples (Score: 0.392) | |
| π Avg Score: 0.398 | |
| π€ Qwen Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. How to Buy Apple Stock (Score: 0.565) | |
| 2. iPhone 15 Pro Purchase Guide (Score: 0.512) | |
| 3. Where to Buy Apples (Score: 0.499) | |
| π Avg Score: 0.525 | |
| π Query: 'I want to buy apple' | |
| -------------------------------------------------- | |
| π― Using instruction: Given a grocery shopping question, retrieve fruit purchase information | |
| π€ OpenAI Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. Where to Buy Apples (Score: 0.502) | |
| 2. Health Benefits of Eating Apples (Score: 0.467) | |
| 3. iPhone 15 Pro Purchase Guide (Score: 0.309) | |
| π Avg Score: 0.426 | |
| π€ Qwen Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. Where to Buy Apples (Score: 0.680) | |
| 2. Health Benefits of Eating Apples (Score: 0.620) | |
| 3. How to Buy Apple Stock (Score: 0.556) | |
| π Avg Score: 0.619 | |
| ============================================================ | |
| TESTING: how can i erase object from image | |
| ============================================================ | |
| π Query: 'how can i erase object from image' | |
| -------------------------------------------------- | |
| π Using plain query (no instruction) | |
| π€ OpenAI Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. Deep Learning for Object Removal (Score: 0.466) | |
| 2. Google Gemini Photo Editing Tutorial (Score: 0.441) | |
| 3. Health Benefits of Eating Apples (Score: 0.051) | |
| π Avg Score: 0.319 | |
| π€ Qwen Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. Google Gemini Photo Editing Tutorial (Score: 0.520) | |
| 2. Deep Learning for Object Removal (Score: 0.469) | |
| 3. Where to Buy Apples (Score: 0.165) | |
| π Avg Score: 0.385 | |
| π Query: 'how can i erase object from image' | |
| -------------------------------------------------- | |
| π― Using instruction: Given a photo editing question, retrieve tutorial information | |
| π€ OpenAI Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. Google Gemini Photo Editing Tutorial (Score: 0.502) | |
| 2. Deep Learning for Object Removal (Score: 0.419) | |
| 3. iPhone 15 Pro Purchase Guide (Score: 0.069) | |
| π Avg Score: 0.330 | |
| π€ Qwen Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. Google Gemini Photo Editing Tutorial (Score: 0.523) | |
| 2. Deep Learning for Object Removal (Score: 0.496) | |
| 3. Where to Buy Apples (Score: 0.134) | |
| π Avg Score: 0.384 | |
| π Query: 'how can i erase object from image' | |
| -------------------------------------------------- | |
| π― Using instruction: Given a research question, retrieve technical information | |
| π€ OpenAI Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. Deep Learning for Object Removal (Score: 0.456) | |
| 2. Google Gemini Photo Editing Tutorial (Score: 0.428) | |
| 3. Health Benefits of Eating Apples (Score: 0.072) | |
| π Avg Score: 0.319 | |
| π€ Qwen Model Results: | |
| ------------------------------ | |
| π Top 3 Results: | |
| 1. Deep Learning for Object Removal (Score: 0.569) | |
| 2. Google Gemini Photo Editing Tutorial (Score: 0.521) | |
| 3. Where to Buy Apples (Score: 0.106) | |
| π Avg Score: 0.399 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment