Skip to content

Instantly share code, notes, and snippets.

@kirshiyin89
Last active February 9, 2023 20:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save kirshiyin89/a3a7dfd529a944ec3bcc53069ffaf652 to your computer and use it in GitHub Desktop.
Save kirshiyin89/a3a7dfd529a944ec3bcc53069ffaf652 to your computer and use it in GitHub Desktop.
Question Answering app with Python and Pinecone
import pandas as pd
import itertools
import pinecone
from sentence_transformers import SentenceTransformer
api_key = "YOUR API KEY FROM PINECONE CONSOLE"
pinecone.init(api_key=api_key, environment='us-west1-gcp')
index_name = "question-answering"
index = pinecone.Index(index_name=index_name)
DATA_FILE = f"comcast.csv"
pd.set_option("display.max_colwidth", 500)
df = pd.read_csv(
f"{DATA_FILE}", usecols=["Ticket", "CustomerComplaint"], index_col=False
)
df = df.sample(frac=1).reset_index(drop=True)
df.drop_duplicates(inplace=True)
df['Ticket'] = df['Ticket'].apply(str)
model = SentenceTransformer("average_word_embeddings_glove.6B.300d")
df["question_vector"] = df.CustomerComplaint.apply(lambda x: model.encode(str(x)).tolist())
def chunks(iterable, batch_size=100):
it = iter(iterable)
chunk = tuple(itertools.islice(it, batch_size))
while chunk:
yield chunk
chunk = tuple(itertools.islice(it, batch_size))
for batch in chunks(zip(df.Ticket, df.question_vector)):
index.upsert(vectors=batch)
query_questions = [
"Wifi Not working",
]
query_vectors = [model.encode(str(question)).tolist() for question in query_questions]
query_results = index.query(queries=query_vectors, top_k=5)
for question, res in zip(query_questions, query_results.results):
print("\n\n\n Original question : " + str(question))
print("\n Most similar questions based on pinecone vector search: \n")
ids = [match.id for match in res.matches]
scores = [match.score for match in res.matches]
df_result = pd.DataFrame(
{
"Ticket#": ids,
"Question": [
df[df.Ticket == _id].CustomerComplaint.values[0] for _id in ids
],
"Score": scores,
}
)
print(df_result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment