Created
December 14, 2023 00:00
-
-
Save guinslym/9733f849a52bf104d17fb45c33b69fde to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import lh3.api | |
client = lh3.api.Client() | |
from pprint import pprint as print | |
from datetime import datetime, date, timedelta | |
import re | |
import time | |
from dateutil.parser import parse | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
from dashboard.utils.utils import (Chats, retrieve_transcript, search_chats, | |
soft_anonimyzation) | |
class TranscriptMessage: | |
def __init__(self, message, position, chat_id, accepted): | |
self.role = self.extract_role(message) | |
self.time = self.extract_time(message, accepted) | |
self.position = position | |
self.chat_id = chat_id | |
self.message = message.text.strip() | |
def extract_role(self, line): | |
if "web.ca.libraryh3lp.com" in line.text: | |
return "guest" | |
if "chat.ca.libraryh3lp.com System message:"in line.text: | |
return "system" | |
elif '@ca.libraryh3lp.com': | |
return "operator" | |
elif "chat.ca.libraryh3lp.com" in line.text: | |
return "system" | |
else: | |
return "unknown" | |
def extract_time(self, line, accepted): | |
time_obj = parse(accepted.split("T")[0] + " " + line.text[0:5]) | |
return time_obj | |
def __repr__(self): | |
return "TranscriptMessage: \n role: {0}\n time: {1}\n position: {2}\n message: {3}\n chat_id: {4}\n".format( | |
self.role, | |
self.time, | |
self.position, | |
self.message, | |
self.chat_id, | |
) | |
today = date.today() | |
chats = client.chats().list_day(2023, 10, 1, to="2023-11-30") | |
print(len(chats)) | |
time.sleep(1) | |
#chat_id = chat.get("id") | |
operator_waited_too_long = list() | |
save_to_excel = list() | |
for this_chat in chats: | |
chat = client.one('chats', this_chat.get("id")).get() | |
accepted = chat.get("accepted") | |
# Extracting the date from the 'accepted' key in the 'chat' dictionary | |
if accepted: | |
date_string = chat.get("accepted").split("T")[0] # Splitting the date and time, taking the date part | |
parsed_date = datetime.strptime(date_string, '%Y-%m-%d') | |
chat_id = chat.get("id") | |
guestID = chat.get("guest").get("jid") | |
operator_username = chat.get("operator").get("name") | |
operator_id = chat.get("operator").get("id") | |
hour = parsed_date.hour | |
queue = chat.get("queue").get("name") | |
started =parse(chat.get("started")).astimezone(None) | |
ended = parse(chat.get("ended")).astimezone(None) | |
hour = started.hour | |
this_transcript = list() | |
chat_transcript = client.one('chats', chat.get("id")).get()['transcript'] or None | |
soup = BeautifulSoup(chat_transcript, 'html.parser') | |
divs = soup.find_all('div') | |
divs = divs[1:len(divs)-1] | |
position = 0 | |
this_transcript = [] # Initialize the list to store transcript messages | |
previous_role = None # Initialize previous role to None | |
for div in divs: | |
msg = TranscriptMessage(div, position, chat_id, accepted) | |
position += 1 | |
if msg.role == "operator" or msg.role == "guest": | |
# Check if the current role is different from the previous role | |
if msg.role != previous_role: | |
this_transcript.append(msg) | |
else: | |
previous_role = msg.role # Update previous_role for the next iteration | |
print(this_transcript) | |
print("*"*200) | |
if len(this_transcript) >= 2: | |
for i in range(1, len(this_transcript)): | |
if this_transcript[i].role == "operator" and this_transcript[i - 1].role == "guest": | |
operator_time = this_transcript[i].time | |
guest_time = this_transcript[i - 1].time | |
#print("Guest_time: {0} - Operator_time: {1}\t result{2}".format(guest_time, operator_time, operator_time - guest_time)) | |
# Formatting timedelta into a human-readable format | |
time_diff = operator_time - guest_time | |
days = time_diff.days | |
hours, remainder = divmod(time_diff.seconds, 3600) | |
minutes, seconds = divmod(remainder, 60) | |
formatted_time_diff = "{:02}:{:02}:{:02}".format(hours, minutes, seconds) | |
if days > 0: | |
formatted_time_diff = f"{days} days, {formatted_time_diff}" | |
save_to_excel.append( | |
{"chat_id":chat_id, | |
"guest_id": guestID, | |
"queue":queue, | |
"operator_time": operator_time, | |
"time_diff": formatted_time_diff, | |
"hour":hour, | |
"operator_id":operator_id, | |
"operator_username":operator_username, | |
"link":"https://ca.libraryh3lp.com/dashboard/queues/redacted/calls/redacted/"+str(chat_id), | |
} | |
) | |
# Check if the operator's message was sent at least 5 minutes after the guest's message | |
if operator_time - guest_time <= timedelta(minutes=5): | |
#print("Operator replied less than 5 minutes after the guest's message.") | |
pass | |
else: | |
print("check this line : {0}".format(this_transcript[i-1].message)) | |
print("check this line : {0}".format(this_transcript[i].message)) | |
#new function to append | |
operator_waited_too_long.append( | |
{"chat_id":chat_id, | |
"guest_id": guestID, | |
"queue":queue, | |
"operator_time": operator_time, | |
"time_diff": formatted_time_diff, | |
"hour":hour, | |
"operator_id":operator_id, | |
"operator_username":operator_username, | |
"link":"https://ca.libraryh3lp.com/dashboard/queues/redacted/calls/redacted/"+str(chat_id),} | |
) | |
def overlapping_chats(chat_id, operator_time): | |
#Retrieve the chat | |
transcript_metadata = client.one('chats', chat_id).get() | |
operator_username = transcript_metadata.get("operator").get("name") | |
# Add 7 minutes to operator_time and subtract 7 minutes | |
started_date = operator_time - timedelta(minutes=7) | |
ended_date = operator_time + timedelta(minutes=7) | |
# get the operator | |
# get chat by this operator on that transcript day | |
query = { | |
"query": { | |
"operator": [operator_username], | |
"from": started_date.strftime("%Y-%m-%d"), | |
"to": ended_date.strftime("%Y-%m-%d"), | |
}, | |
"sort": [{"started": "descending"}], | |
} | |
chats, content_range = search_chats(client, query, chat_range=(0,25)) | |
overlapping_chats = [] | |
for chat in chats: | |
# Check if the chat overlaps with the specified time range | |
result1 = parse(chat.get("started")) <= parse(transcript_metadata.get("ended")) | |
result2 = parse(chat.get("ended")) >= parse(transcript_metadata.get("started")) | |
result3 = parse(chat.get("ended")) >= parse(transcript_metadata.get("started")) | |
result4 = parse(chat.get("started")) <= parse(transcript_metadata.get("ended")) | |
if result1 and result2 or result3 and result4: | |
overlapping_chats.append(chat) | |
overlapping_chats = [Chats(chat) for chat in overlapping_chats] | |
simulteanous_chat = overlapping_chats | |
return len(simulteanous_chat) | |
""" | |
df = pd.DataFrame(save_to_excel) | |
with pd.ExcelWriter('all_silence_time.xlsx') as writer: | |
df.to_excel(writer, index=False) | |
print(save_to_excel) | |
#find_delayed_responses(chat_transcript) | |
""" | |
df = pd.DataFrame(operator_waited_too_long) | |
# Apply the function to each row in the DataFrame | |
df['simultaneous_chats'] = df.apply(lambda row: overlapping_chats( | |
row['chat_id'], row['operator_time'] | |
), axis=1) | |
print(df.head()) | |
with pd.ExcelWriter('operator_waited_too_long.xlsx') as writer: | |
df.to_excel(writer, index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment