Skip to content

Instantly share code, notes, and snippets.

@JitendraZaa
Created December 13, 2023 17:43
Show Gist options
  • Save JitendraZaa/f04ed4c5cfca6c97b97cf57f38eeeab1 to your computer and use it in GitHub Desktop.
Save JitendraZaa/f04ed4c5cfca6c97b97cf57f38eeeab1 to your computer and use it in GitHub Desktop.
Python code to connect to Salesforce and use Open AI for embedding
import os
import openai
import numpy as np
import pandas as pd
import requests
from ast import literal_eval
def lambda_handler(event, context):
question = event.get('question', 'Default Question')
# Set OpenAI and Salesforce credentials
openai.api_key = os.getenv('OPENAI_API_KEY')
sf_username = os.getenv('SF_USERNAME')
sf_password = os.getenv('SF_PASSWORD')
sf_security_token = os.getenv('SF_SECURITY_TOKEN')
sf_instance = os.getenv('SF_INSTANCE')
sf_instance_afterlogin = os.getenv('SF_INSTANCE_AFTERLOGIN')
sf_client_id = os.getenv('SF_CLIENT_ID')
sf_client_secret = os.getenv('SF_CLIENT_SECRET')
# Salesforce authentication and data extraction
auth_url = f'https://{sf_instance}/services/oauth2/token'
query_url = f'https://{sf_instance_afterlogin}/services/data/v57.0/query?q=SELECT+Id,Name,Company,Title,LeadSource,Email,Status+FROM+Lead'
auth_payload = {
'grant_type': 'password',
'client_id': sf_client_id,
'client_secret': sf_client_secret,
'username': sf_username,
'password': sf_password + sf_security_token
}
auth_response = requests.post(auth_url, data=auth_payload)
access_token = auth_response.json()['access_token']
headers = {'Authorization': f'Bearer {access_token}'}
query_response = requests.get(query_url, headers=headers)
leads = query_response.json().get('records', [])
# OpenAI Embedding
content = "\n".join([f"{lead['Name']} {lead['Email']}" for lead in leads])
content_chunks = [content[i:i + 8000] for i in range(0, len(content), 8000)]
content_embeddings = []
for chunk in content_chunks:
content_embeddings += get_embeddings_for_text(chunk)
question_embedding = get_embeddings_for_text(question)
similarities = [cosine_similarity(question_embedding, record_embedding) for record_embedding in content_embeddings]
results = list(zip(leads, similarities))
results.sort(key=lambda x: x[1][0], reverse=True)
top_results = results[:100]
response_data = [{'Lead': result[0]['Name'], 'Similarity': round(result[1][0], 4)} for result in top_results]
return {
'statusCode': 200,
'body': {'question': question, 'results': response_data}
}
def cosine_similarity(A, B):
return np.dot(A, B) / (norm(A) * norm(B))
def get_embeddings_for_text(input_term):
input_vector = openai.Embedding.create(input=input_term, model="text-embedding-ada-002")
return input_vector['data'][0]['embedding']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment