guinslym/delays_too_long.py

## delays_too_long.py

import lh3.api
client = lh3.api.Client()
from pprint import pprint as print
from datetime import datetime, date, timedelta
import re
import time
from dateutil.parser import parse
from bs4 import BeautifulSoup
import pandas as pd
from dashboard.utils.utils import (Chats, retrieve_transcript, search_chats,
                                   soft_anonimyzation)

class TranscriptMessage:
    def __init__(self, message, position, chat_id, accepted):
        self.role = self.extract_role(message)
        self.time = self.extract_time(message, accepted)
        self.position = position
        self.chat_id = chat_id
        self.message = message.text.strip()

    def extract_role(self, line):
        if "web.ca.libraryh3lp.com" in line.text:
            return "guest"
        if "chat.ca.libraryh3lp.com System message:"in line.text:
            return "system"
        elif '@ca.libraryh3lp.com':
            return "operator"
        elif "chat.ca.libraryh3lp.com" in line.text:
            return "system"
        else:
            return "unknown"

    def extract_time(self, line, accepted):
        time_obj = parse(accepted.split("T")[0] + " " + line.text[0:5])
        return time_obj


    def __repr__(self):
        return "TranscriptMessage: \n role: {0}\n time: {1}\n position: {2}\n message: {3}\n chat_id: {4}\n".format(
            self.role,
            self.time,
            self.position,
            self.message,
            self.chat_id,
        )


today = date.today()
chats = client.chats().list_day(2023, 10, 1, to="2023-11-30")
print(len(chats))
time.sleep(1)
#chat_id = chat.get("id")

operator_waited_too_long = list()

save_to_excel = list()

for this_chat in chats:
    chat = client.one('chats', this_chat.get("id")).get()
    accepted =  chat.get("accepted")
    # Extracting the date from the 'accepted' key in the 'chat' dictionary
    if accepted:
        date_string = chat.get("accepted").split("T")[0]  # Splitting the date and time, taking the date part
        parsed_date = datetime.strptime(date_string, '%Y-%m-%d')

        chat_id =  chat.get("id")
        guestID =  chat.get("guest").get("jid")
        operator_username = chat.get("operator").get("name")
        operator_id = chat.get("operator").get("id")
        hour = parsed_date.hour
        queue = chat.get("queue").get("name")
        started =parse(chat.get("started")).astimezone(None)
        ended = parse(chat.get("ended")).astimezone(None)
        hour = started.hour

        this_transcript = list()


        chat_transcript = client.one('chats', chat.get("id")).get()['transcript'] or None
        soup = BeautifulSoup(chat_transcript, 'html.parser')
        divs = soup.find_all('div')
        divs = divs[1:len(divs)-1]
        position = 0
        this_transcript = []  # Initialize the list to store transcript messages
        previous_role = None  # Initialize previous role to None
        for div in divs:
            msg = TranscriptMessage(div, position, chat_id, accepted)
            position += 1
            if msg.role == "operator" or msg.role == "guest":
                # Check if the current role is different from the previous role
                if msg.role != previous_role:
                    this_transcript.append(msg)
                else:
                    previous_role = msg.role  # Update previous_role for the next iteration
        print(this_transcript)

        print("*"*200)


        if len(this_transcript) >= 2:
            for i in range(1, len(this_transcript)):
                if this_transcript[i].role == "operator" and this_transcript[i - 1].role == "guest":
                    operator_time = this_transcript[i].time
                    guest_time = this_transcript[i - 1].time
                    #print("Guest_time: {0} - Operator_time: {1}\t result{2}".format(guest_time, operator_time, operator_time - guest_time))

                    # Formatting timedelta into a human-readable format
                    time_diff = operator_time - guest_time
                    days = time_diff.days
                    hours, remainder = divmod(time_diff.seconds, 3600)
                    minutes, seconds = divmod(remainder, 60)

                    formatted_time_diff = "{:02}:{:02}:{:02}".format(hours, minutes, seconds)
                    if days > 0:
                        formatted_time_diff = f"{days} days, {formatted_time_diff}"


                    save_to_excel.append(
                            {"chat_id":chat_id,
                            "guest_id": guestID,
                            "queue":queue,
                            "operator_time": operator_time,
                            "time_diff": formatted_time_diff,
                            "hour":hour,
                            "operator_id":operator_id,
                            "operator_username":operator_username,
                             "link":"https://ca.libraryh3lp.com/dashboard/queues/redacted/calls/redacted/"+str(chat_id),
                             }
                    )
                    # Check if the operator's message was sent at least 5 minutes after the guest's message
                    if operator_time - guest_time <= timedelta(minutes=5):
                        #print("Operator replied less than 5 minutes after the guest's message.")
                        pass
                    else:
                        print("check this line : {0}".format(this_transcript[i-1].message))
                        print("check this line : {0}".format(this_transcript[i].message))
                        #new function to append
                        operator_waited_too_long.append(
                            {"chat_id":chat_id,
                            "guest_id": guestID,
                            "queue":queue,
                            "operator_time": operator_time,
                            "time_diff": formatted_time_diff,
                            "hour":hour,
                            "operator_id":operator_id,
                            "operator_username":operator_username,
                             "link":"https://ca.libraryh3lp.com/dashboard/queues/redacted/calls/redacted/"+str(chat_id),}
                        )


def overlapping_chats(chat_id, operator_time):

    #Retrieve the chat
    transcript_metadata = client.one('chats', chat_id).get()
    operator_username = transcript_metadata.get("operator").get("name")

    # Add 7 minutes to operator_time and subtract 7 minutes
    started_date = operator_time - timedelta(minutes=7)
    ended_date = operator_time + timedelta(minutes=7)

    # get the operator
    # get chat by this operator on that transcript day
    query = {
        "query": {
            "operator": [operator_username],
            "from": started_date.strftime("%Y-%m-%d"),
            "to": ended_date.strftime("%Y-%m-%d"),
        },
        "sort": [{"started": "descending"}],
    }

    chats, content_range = search_chats(client, query, chat_range=(0,25))
    overlapping_chats = []
    for chat in chats:

        # Check if the chat overlaps with the specified time range
        result1 = parse(chat.get("started")) <= parse(transcript_metadata.get("ended"))
        result2 = parse(chat.get("ended")) >= parse(transcript_metadata.get("started"))
        result3 = parse(chat.get("ended")) >= parse(transcript_metadata.get("started"))
        result4 = parse(chat.get("started")) <= parse(transcript_metadata.get("ended"))

        if result1 and result2 or result3 and result4:
            overlapping_chats.append(chat)

    overlapping_chats = [Chats(chat) for chat in overlapping_chats]
    simulteanous_chat = overlapping_chats
    return len(simulteanous_chat)


"""
df = pd.DataFrame(save_to_excel)
with pd.ExcelWriter('all_silence_time.xlsx') as writer:
    df.to_excel(writer, index=False)
print(save_to_excel)
#find_delayed_responses(chat_transcript)
"""

df = pd.DataFrame(operator_waited_too_long)

# Apply the function to each row in the DataFrame
df['simultaneous_chats'] = df.apply(lambda row: overlapping_chats(
    row['chat_id'], row['operator_time']
), axis=1)

print(df.head())
with pd.ExcelWriter('operator_waited_too_long.xlsx') as writer:
    df.to_excel(writer, index=False)

	import lh3.api
	client = lh3.api.Client()
	from pprint import pprint as print
	from datetime import datetime, date, timedelta
	import re
	import time
	from dateutil.parser import parse
	from bs4 import BeautifulSoup
	import pandas as pd
	from dashboard.utils.utils import (Chats, retrieve_transcript, search_chats,
	soft_anonimyzation)

	class TranscriptMessage:
	def __init__(self, message, position, chat_id, accepted):
	self.role = self.extract_role(message)
	self.time = self.extract_time(message, accepted)
	self.position = position
	self.chat_id = chat_id
	self.message = message.text.strip()

	def extract_role(self, line):
	if "web.ca.libraryh3lp.com" in line.text:
	return "guest"
	if "chat.ca.libraryh3lp.com System message:"in line.text:
	return "system"
	elif '@ca.libraryh3lp.com':
	return "operator"
	elif "chat.ca.libraryh3lp.com" in line.text:
	return "system"
	else:
	return "unknown"

	def extract_time(self, line, accepted):
	time_obj = parse(accepted.split("T")[0] + " " + line.text[0:5])
	return time_obj


	def __repr__(self):
	return "TranscriptMessage: \n role: {0}\n time: {1}\n position: {2}\n message: {3}\n chat_id: {4}\n".format(
	self.role,
	self.time,
	self.position,
	self.message,
	self.chat_id,
	)




	today = date.today()
	chats = client.chats().list_day(2023, 10, 1, to="2023-11-30")
	print(len(chats))
	time.sleep(1)
	#chat_id = chat.get("id")

	operator_waited_too_long = list()

	save_to_excel = list()

	for this_chat in chats:
	chat = client.one('chats', this_chat.get("id")).get()
	accepted = chat.get("accepted")
	# Extracting the date from the 'accepted' key in the 'chat' dictionary
	if accepted:
	date_string = chat.get("accepted").split("T")[0] # Splitting the date and time, taking the date part
	parsed_date = datetime.strptime(date_string, '%Y-%m-%d')

	chat_id = chat.get("id")
	guestID = chat.get("guest").get("jid")
	operator_username = chat.get("operator").get("name")
	operator_id = chat.get("operator").get("id")
	hour = parsed_date.hour
	queue = chat.get("queue").get("name")
	started =parse(chat.get("started")).astimezone(None)
	ended = parse(chat.get("ended")).astimezone(None)
	hour = started.hour

	this_transcript = list()



	chat_transcript = client.one('chats', chat.get("id")).get()['transcript'] or None
	soup = BeautifulSoup(chat_transcript, 'html.parser')
	divs = soup.find_all('div')
	divs = divs[1:len(divs)-1]
	position = 0
	this_transcript = [] # Initialize the list to store transcript messages
	previous_role = None # Initialize previous role to None
	for div in divs:
	msg = TranscriptMessage(div, position, chat_id, accepted)
	position += 1
	if msg.role == "operator" or msg.role == "guest":
	# Check if the current role is different from the previous role
	if msg.role != previous_role:
	this_transcript.append(msg)
	else:
	previous_role = msg.role # Update previous_role for the next iteration
	print(this_transcript)

	print(""200)



	if len(this_transcript) >= 2:
	for i in range(1, len(this_transcript)):
	if this_transcript[i].role == "operator" and this_transcript[i - 1].role == "guest":
	operator_time = this_transcript[i].time
	guest_time = this_transcript[i - 1].time
	#print("Guest_time: {0} - Operator_time: {1}\t result{2}".format(guest_time, operator_time, operator_time - guest_time))

	# Formatting timedelta into a human-readable format
	time_diff = operator_time - guest_time
	days = time_diff.days
	hours, remainder = divmod(time_diff.seconds, 3600)
	minutes, seconds = divmod(remainder, 60)

	formatted_time_diff = "{:02}:{:02}:{:02}".format(hours, minutes, seconds)
	if days > 0:
	formatted_time_diff = f"{days} days, {formatted_time_diff}"



	save_to_excel.append(
	{"chat_id":chat_id,
	"guest_id": guestID,
	"queue":queue,
	"operator_time": operator_time,
	"time_diff": formatted_time_diff,
	"hour":hour,
	"operator_id":operator_id,
	"operator_username":operator_username,
	"link":"https://ca.libraryh3lp.com/dashboard/queues/redacted/calls/redacted/"+str(chat_id),
	}
	)
	# Check if the operator's message was sent at least 5 minutes after the guest's message
	if operator_time - guest_time <= timedelta(minutes=5):
	#print("Operator replied less than 5 minutes after the guest's message.")
	pass
	else:
	print("check this line : {0}".format(this_transcript[i-1].message))
	print("check this line : {0}".format(this_transcript[i].message))
	#new function to append
	operator_waited_too_long.append(
	{"chat_id":chat_id,
	"guest_id": guestID,
	"queue":queue,
	"operator_time": operator_time,
	"time_diff": formatted_time_diff,
	"hour":hour,
	"operator_id":operator_id,
	"operator_username":operator_username,
	"link":"https://ca.libraryh3lp.com/dashboard/queues/redacted/calls/redacted/"+str(chat_id),}
	)


	def overlapping_chats(chat_id, operator_time):

	#Retrieve the chat
	transcript_metadata = client.one('chats', chat_id).get()
	operator_username = transcript_metadata.get("operator").get("name")

	# Add 7 minutes to operator_time and subtract 7 minutes
	started_date = operator_time - timedelta(minutes=7)
	ended_date = operator_time + timedelta(minutes=7)

	# get the operator
	# get chat by this operator on that transcript day
	query = {
	"query": {
	"operator": [operator_username],
	"from": started_date.strftime("%Y-%m-%d"),
	"to": ended_date.strftime("%Y-%m-%d"),
	},
	"sort": [{"started": "descending"}],
	}

	chats, content_range = search_chats(client, query, chat_range=(0,25))
	overlapping_chats = []
	for chat in chats:

	# Check if the chat overlaps with the specified time range
	result1 = parse(chat.get("started")) <= parse(transcript_metadata.get("ended"))
	result2 = parse(chat.get("ended")) >= parse(transcript_metadata.get("started"))
	result3 = parse(chat.get("ended")) >= parse(transcript_metadata.get("started"))
	result4 = parse(chat.get("started")) <= parse(transcript_metadata.get("ended"))

	if result1 and result2 or result3 and result4:
	overlapping_chats.append(chat)

	overlapping_chats = [Chats(chat) for chat in overlapping_chats]
	simulteanous_chat = overlapping_chats
	return len(simulteanous_chat)


	"""
	df = pd.DataFrame(save_to_excel)
	with pd.ExcelWriter('all_silence_time.xlsx') as writer:
	df.to_excel(writer, index=False)
	print(save_to_excel)
	#find_delayed_responses(chat_transcript)
	"""

	df = pd.DataFrame(operator_waited_too_long)

	# Apply the function to each row in the DataFrame
	df['simultaneous_chats'] = df.apply(lambda row: overlapping_chats(
	row['chat_id'], row['operator_time']
	), axis=1)

	print(df.head())
	with pd.ExcelWriter('operator_waited_too_long.xlsx') as writer:
	df.to_excel(writer, index=False)