Skip to content

Instantly share code, notes, and snippets.

@guinslym
Created December 14, 2023 00:00
Show Gist options
  • Save guinslym/9733f849a52bf104d17fb45c33b69fde to your computer and use it in GitHub Desktop.
Save guinslym/9733f849a52bf104d17fb45c33b69fde to your computer and use it in GitHub Desktop.
import lh3.api
client = lh3.api.Client()
from pprint import pprint as print
from datetime import datetime, date, timedelta
import re
import time
from dateutil.parser import parse
from bs4 import BeautifulSoup
import pandas as pd
from dashboard.utils.utils import (Chats, retrieve_transcript, search_chats,
soft_anonimyzation)
class TranscriptMessage:
def __init__(self, message, position, chat_id, accepted):
self.role = self.extract_role(message)
self.time = self.extract_time(message, accepted)
self.position = position
self.chat_id = chat_id
self.message = message.text.strip()
def extract_role(self, line):
if "web.ca.libraryh3lp.com" in line.text:
return "guest"
if "chat.ca.libraryh3lp.com System message:"in line.text:
return "system"
elif '@ca.libraryh3lp.com':
return "operator"
elif "chat.ca.libraryh3lp.com" in line.text:
return "system"
else:
return "unknown"
def extract_time(self, line, accepted):
time_obj = parse(accepted.split("T")[0] + " " + line.text[0:5])
return time_obj
def __repr__(self):
return "TranscriptMessage: \n role: {0}\n time: {1}\n position: {2}\n message: {3}\n chat_id: {4}\n".format(
self.role,
self.time,
self.position,
self.message,
self.chat_id,
)
today = date.today()
chats = client.chats().list_day(2023, 10, 1, to="2023-11-30")
print(len(chats))
time.sleep(1)
#chat_id = chat.get("id")
operator_waited_too_long = list()
save_to_excel = list()
for this_chat in chats:
chat = client.one('chats', this_chat.get("id")).get()
accepted = chat.get("accepted")
# Extracting the date from the 'accepted' key in the 'chat' dictionary
if accepted:
date_string = chat.get("accepted").split("T")[0] # Splitting the date and time, taking the date part
parsed_date = datetime.strptime(date_string, '%Y-%m-%d')
chat_id = chat.get("id")
guestID = chat.get("guest").get("jid")
operator_username = chat.get("operator").get("name")
operator_id = chat.get("operator").get("id")
hour = parsed_date.hour
queue = chat.get("queue").get("name")
started =parse(chat.get("started")).astimezone(None)
ended = parse(chat.get("ended")).astimezone(None)
hour = started.hour
this_transcript = list()
chat_transcript = client.one('chats', chat.get("id")).get()['transcript'] or None
soup = BeautifulSoup(chat_transcript, 'html.parser')
divs = soup.find_all('div')
divs = divs[1:len(divs)-1]
position = 0
this_transcript = [] # Initialize the list to store transcript messages
previous_role = None # Initialize previous role to None
for div in divs:
msg = TranscriptMessage(div, position, chat_id, accepted)
position += 1
if msg.role == "operator" or msg.role == "guest":
# Check if the current role is different from the previous role
if msg.role != previous_role:
this_transcript.append(msg)
else:
previous_role = msg.role # Update previous_role for the next iteration
print(this_transcript)
print("*"*200)
if len(this_transcript) >= 2:
for i in range(1, len(this_transcript)):
if this_transcript[i].role == "operator" and this_transcript[i - 1].role == "guest":
operator_time = this_transcript[i].time
guest_time = this_transcript[i - 1].time
#print("Guest_time: {0} - Operator_time: {1}\t result{2}".format(guest_time, operator_time, operator_time - guest_time))
# Formatting timedelta into a human-readable format
time_diff = operator_time - guest_time
days = time_diff.days
hours, remainder = divmod(time_diff.seconds, 3600)
minutes, seconds = divmod(remainder, 60)
formatted_time_diff = "{:02}:{:02}:{:02}".format(hours, minutes, seconds)
if days > 0:
formatted_time_diff = f"{days} days, {formatted_time_diff}"
save_to_excel.append(
{"chat_id":chat_id,
"guest_id": guestID,
"queue":queue,
"operator_time": operator_time,
"time_diff": formatted_time_diff,
"hour":hour,
"operator_id":operator_id,
"operator_username":operator_username,
"link":"https://ca.libraryh3lp.com/dashboard/queues/redacted/calls/redacted/"+str(chat_id),
}
)
# Check if the operator's message was sent at least 5 minutes after the guest's message
if operator_time - guest_time <= timedelta(minutes=5):
#print("Operator replied less than 5 minutes after the guest's message.")
pass
else:
print("check this line : {0}".format(this_transcript[i-1].message))
print("check this line : {0}".format(this_transcript[i].message))
#new function to append
operator_waited_too_long.append(
{"chat_id":chat_id,
"guest_id": guestID,
"queue":queue,
"operator_time": operator_time,
"time_diff": formatted_time_diff,
"hour":hour,
"operator_id":operator_id,
"operator_username":operator_username,
"link":"https://ca.libraryh3lp.com/dashboard/queues/redacted/calls/redacted/"+str(chat_id),}
)
def overlapping_chats(chat_id, operator_time):
#Retrieve the chat
transcript_metadata = client.one('chats', chat_id).get()
operator_username = transcript_metadata.get("operator").get("name")
# Add 7 minutes to operator_time and subtract 7 minutes
started_date = operator_time - timedelta(minutes=7)
ended_date = operator_time + timedelta(minutes=7)
# get the operator
# get chat by this operator on that transcript day
query = {
"query": {
"operator": [operator_username],
"from": started_date.strftime("%Y-%m-%d"),
"to": ended_date.strftime("%Y-%m-%d"),
},
"sort": [{"started": "descending"}],
}
chats, content_range = search_chats(client, query, chat_range=(0,25))
overlapping_chats = []
for chat in chats:
# Check if the chat overlaps with the specified time range
result1 = parse(chat.get("started")) <= parse(transcript_metadata.get("ended"))
result2 = parse(chat.get("ended")) >= parse(transcript_metadata.get("started"))
result3 = parse(chat.get("ended")) >= parse(transcript_metadata.get("started"))
result4 = parse(chat.get("started")) <= parse(transcript_metadata.get("ended"))
if result1 and result2 or result3 and result4:
overlapping_chats.append(chat)
overlapping_chats = [Chats(chat) for chat in overlapping_chats]
simulteanous_chat = overlapping_chats
return len(simulteanous_chat)
"""
df = pd.DataFrame(save_to_excel)
with pd.ExcelWriter('all_silence_time.xlsx') as writer:
df.to_excel(writer, index=False)
print(save_to_excel)
#find_delayed_responses(chat_transcript)
"""
df = pd.DataFrame(operator_waited_too_long)
# Apply the function to each row in the DataFrame
df['simultaneous_chats'] = df.apply(lambda row: overlapping_chats(
row['chat_id'], row['operator_time']
), axis=1)
print(df.head())
with pd.ExcelWriter('operator_waited_too_long.xlsx') as writer:
df.to_excel(writer, index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment