Skip to content

Instantly share code, notes, and snippets.

@AndrewTrefilov
Last active October 29, 2020 14:17
Show Gist options
  • Save AndrewTrefilov/8ac5db362f2fb8825820b100cb8bfecd to your computer and use it in GitHub Desktop.
Save AndrewTrefilov/8ac5db362f2fb8825820b100cb8bfecd to your computer and use it in GitHub Desktop.
import pandas as pd
import requests
import pickle
import numpy as np
import time
from datetime import timedelta, date
import re
import pymysql.cursors
import pymysql
from graphviz import Digraph
import json
from utils import *
class OracleDBHandler():
def __init__(self):
self.host = 'oracle_db_handler'
self.port = 8080
def select(self, connection_string, query_text):
"""
:rtype: object
"""
params = {'connection_string': connection_string,
'query_text': query_text}
for i in range(0, 3):
try:
response = requests.get(f'http://{self.host}:{self.port}/select', params=params)
df = pd.DataFrame.from_dict(pickle.loads(response.content))
print('Success')
return df
except Exception as e:
print(str(e))
time.sleep(10)
def df_from_mysql_db(statement):
connection = pymysql.connect(host='log_db', user='pyuser', passwd='12345', db='local_logs',
cursorclass=pymysql.cursors.DictCursor)
df = pd.read_sql(statement, connection)
connection.close()
return df
def get_all_from_mysql():
statement = 'select * from XXX'
df = df_from_mysql_db(statement)
return df
def get_b_from_mysql():
statement = 'select * from XXX'
df = df_from_mysql_db(statement)
return df
class DataLoader():
__query_for_predicted_users = """
SELECT *
FROM XXX
WHERE ins_date between to_date('{:%Y-%m-%d}', 'yyyy-mm-dd')
AND to_date('{:%Y-%m-%d}', 'yyyy-mm-dd')
"""
__query_for_damage_data = """
SELECT d.id as damage_id,
d.reg_date,
d.CLIENT_SWITCH_DATE,
dous.user_id
FROM XXX d
JOIN XXX du ON du.damage_id = d.id AND du.usluga_id = 1
JOIN XXX do ON d.id = do.damage_id
JOIN XXX dous ON do.id = dous.object_id AND DOUS.USLUGA_ID=1
WHERE d.damage_location = 'network'
AND (d.category = 0)
AND d.REG_DATE between to_date('{:%Y-%m-%d}', 'yyyy-mm-dd')
AND to_date('{:%Y-%m-%d}', 'yyyy-mm-dd')
"""
__query_for_incidents = """
SELECT /*+ INDEX(d XXX) INDEX(i XXX)*/
d.id as damage_id,
i.id as inc_id,
i.reg_date as inc_reg_date,
i.user_id,
i.channel_int_id,
cd.phone as inc_phone,
i.reg_user_id,
i.reg_department_id,
i.reg_comments
FROM XXX i
JOIN XXX io ON io.incident_id = i.id AND io.name = 'damages'
JOIN XXX d ON d.id = io.rec_id
JOIN XXX du ON du.damage_id = d.id AND du.usluga_id = 1
LEFT JOIN XXX cd ON cd.id = i.call_detail_id
WHERE d.damage_location = 'network'
AND (d.category = 0)
AND (i.service_int_id = 5226 OR i.service_int_id = 5922)
AND d.REG_DATE between to_date('{:%Y-%m-%d}', 'yyyy-mm-dd')
and to_date('{:%Y-%m-%d}', 'yyyy-mm-dd')
AND (i.channel_int_id in (4085, 4270, 4125, 5215, 4661, 6362)
OR i.REG_DEPARTMENT_ID=78
OR i.REG_DEPARTMENT_ID=134
OR i.REG_DEPARTMENT_ID=105
OR i.REG_DEPARTMENT_ID=81)
"""
__query_for_notify_data = """
SELECT h.user_id,
h.exec_date AS last_send_date,
lower (task_class) AS mes_type,
decode (
state,
1, 'send',
2, 'not send',
3, 'not send',
4, 'not send',
'unknown'
) AS state,
NULL AS review_date
FROM XXX h
WHERE task_class IN ('SMS')
AND h.nas_ip like '%237%'
AND h.assign_date >= to_date('{:%Y-%m-%d}', 'yyyy-mm-dd')
AND h.assign_date < to_date('{:%Y-%m-%d}', 'yyyy-mm-dd') + 1
AND state <> 4
UNION ALL
SELECT t.user_id,
ins_date AS last_send_date,
'push' AS mes_type,
decode (
t.status,
0, 'new',
1, 'handling',
2, 'send',
3, 'not send',
'unknown'
) AS state,
t.review_date
FROM XXX t
WHERE pattern_ids like '%237%'
AND t.ins_date >= to_date ('{:%Y-%m-%d}', 'yyyy-mm-dd')
AND t.ins_date < to_date ('{:%Y-%m-%d}', 'yyyy-mm-dd') + 1
"""
__query_for_processed_damages = """
SELECT *
FROM XXX
WHERE ins_date between to_date('{:%Y-%m-%d}', 'yyyy-mm-dd')
AND to_date('{:%Y-%m-%d}', 'yyyy-mm-dd')
"""
def __init__(self):
self.predicted_users = None
self.damage_df = None
self.incidents = None
self.notify_data = None
self.log_df = None
self.df_variant_B = None
def load(self, start_date, end_date):
self.predicted_users = OracleDBHandler().select('main_ml_connection',
self.__query_for_predicted_users.format(start_date, end_date))
self.damage_df = OracleDBHandler().select('main_ml_connection',
self.__query_for_damage_data.format(start_date, end_date))
self.incidents = OracleDBHandler().select('main_ml_connection',
self.__query_for_incidents.format(start_date, end_date))
self.notify_data = OracleDBHandler().select('main_ml_connection',
self.__query_for_notify_data.format(start_date, end_date,
start_date, end_date))
self.processed_damages = OracleDBHandler().select('main_ml_connection',
self.__query_for_processed_damages.format(start_date, end_date))
self.log_df = get_all_from_mysql()
self.df_variant_B = get_b_from_mysql()
class MetricCounter():
def __init__(self):
self.stat_df = None
def unite_parts(self, predicted_users, damage_df, incidents, notify_data):
self.stat_df = damage_df[['DAMAGE_ID', 'REG_DATE', 'USER_ID']]
damage_df = damage_df.groupby(['DAMAGE_ID', 'REG_DATE',
'CLIENT_SWITCH_DATE']).USER_ID.count() \
.reset_index() \
.rename(columns={'USER_ID': 'DAM_USERS_COUNT'})
damage_df['DOWNTIME_MIN'] = (damage_df.CLIENT_SWITCH_DATE - damage_df.REG_DATE).dt.total_seconds() / 60
# primetime - 09:00 - 22:00
damage_df['is_primetime'] = damage_df.REG_DATE.apply(lambda x: 1 if x.hour in range(9, 22) else 0)
damage_df.drop('CLIENT_SWITCH_DATE', axis=1, inplace=True)
predicted_users['is_predicted'] = 1
predicted_users = predicted_users.rename(columns={'IS_NOTIFIED_ABOUT_START': 'is_notified'})
predicted_users.drop(['INS_DATE', 'IS_NOTIFIED_ABOUT_CLOSE'], axis=1, inplace=True)
# 6294308 - чатбот, 286738 - job, 661641 - user_voip
incidents['is_inc_to_person'] = incidents.REG_USER_ID.apply(
lambda x: 0 if x in [6294308, 286738, 661641] else 1)
incidents.drop('REG_USER_ID', axis=1, inplace=True)
incidents['had_incident'] = 1
incidents['INC_PHONE'] = incidents.REG_COMMENTS.str.extract(r'\[(\d+)\]')
incidents.drop('REG_COMMENTS', axis=1, inplace=True)
self.stat_df = self.stat_df.merge(damage_df, on=['DAMAGE_ID', 'REG_DATE'], how='left')
self.stat_df = self.stat_df.merge(predicted_users, on=['DAMAGE_ID', 'USER_ID'], how='left')
self.stat_df['date'] = self.stat_df.REG_DATE.dt.date
notify_data.LAST_SEND_DATE = pd.to_datetime(notify_data.LAST_SEND_DATE)
notify_data['date'] = notify_data.LAST_SEND_DATE.dt.date
self.stat_df = self.stat_df.merge(notify_data, on=['USER_ID', 'date'], how='left')
self.stat_df = self.stat_df.merge(incidents, on=['DAMAGE_ID', 'USER_ID'], how='left')
self.stat_df.had_incident = self.stat_df.had_incident.fillna(0).astype(int)
self.stat_df.is_notified = self.stat_df.is_notified.fillna(0).astype(int)
self.stat_df.is_predicted = self.stat_df.is_predicted.fillna(0).astype(int)
self.stat_df.drop('date', axis=1, inplace=True)
def get_incidents_after_notifying(self, start_date, end_date):
df = self.stat_df[(self.stat_df.REG_DATE.dt.date >= start_date) & (self.stat_df.REG_DATE.dt.date <= end_date)].copy()
df = df[(df.is_predicted == 1) & \
(df.is_notified == 1) & \
(df.had_incident == 1)]
df = df[['DAMAGE_ID', 'REG_DATE', 'USER_ID', 'MES_TYPE', 'PHONE', 'STATE',
'LAST_SEND_DATE', 'REVIEW_DATE', 'INC_ID', 'INC_REG_DATE', 'CHANNEL_INT_ID',
'INC_PHONE', 'is_inc_to_person']]
df.STATE = df.STATE.fillna('not send')
df.INC_ID = df.INC_ID.astype(int)
df.CHANNEL_INT_ID = df.CHANNEL_INT_ID.astype(int)
df.is_inc_to_person = df.is_inc_to_person.astype(int)
df.CHANNEL_INT_ID = df.CHANNEL_INT_ID.map({4661: 'МП', 5215: 'Чат МП',
4085: 'IVR', 4125: 'Телефон',
6362: 'Чат/Соцсети'})
df.columns = ['DAMAGE_ID', 'REG_DATE', 'USER_ID', 'Тип уведомления', 'Номер для отправки', 'Состояние',
'Дата отправки', 'Дата просмотра (для пушей)', 'INC_ID', 'INC_REG_DATE',
'Канал обращения', 'Номер с которого обратились', 'Обращение до оператора']
df.sort_values(['DAMAGE_ID',
'USER_ID',
'INC_REG_DATE']).to_csv('./artefacts/incidents_after_notifying.csv',
index=False)
return
class MetricsCollection():
d_m = {}
h_v = {}
def __init__(self, damage_df, incidents, log_df, predicted_users, notify_df, df_variant_B):
self.d_df = damage_df.dropna(subset=['CLIENT_SWITCH_DATE'])
self.i_df = incidents[incidents.DAMAGE_ID.isin(self.d_df.DAMAGE_ID.values)]
self.l_df = log_df[log_df.DAMAGE_ID.isin(self.d_df.DAMAGE_ID.values)].drop_duplicates(subset=['DAMAGE_ID', 'USER_ID']).reset_index(drop=True)
self.p_df = predicted_users
self.n_df = notify_df
self.df_B = df_variant_B
def get_helper_variables(self):
new_df_inc = self.l_df[self.l_df.DAMAGE_ID.isin(self.i_df.DAMAGE_ID.values)]
self.h_v['notified_dam_df'] = self.p_df[(self.p_df.IS_NOTIFIED_ABOUT_START != 0)]
self.h_v['not_notified_dam_df'] = self.p_df[(self.p_df.IS_NOTIFIED_ABOUT_START == 0)]
self.h_v['notified_dam_filter_pass_df'] = new_df_inc[new_df_inc.DAMAGE_ID.isin(self.h_v['notified_dam_df'].DAMAGE_ID.values)]
self.h_v['dam_filter_not_pass_df'] = new_df_inc[~new_df_inc.DAMAGE_ID.isin(self.h_v['notified_dam_df'].DAMAGE_ID.values)]
self.h_v['class_1'] = self.d_df[(self.d_df.DAMAGE_ID.isin(self.h_v['notified_dam_filter_pass_df'].DAMAGE_ID.values)) &\
(self.d_df.USER_ID.isin(self.p_df.USER_ID.values))]
class_0 = self.d_df[(self.d_df.DAMAGE_ID.isin(self.h_v['notified_dam_filter_pass_df'].DAMAGE_ID.values)) &\
(~self.d_df.USER_ID.isin(self.p_df.USER_ID.values))]
self.h_v['class0_with_B'] = class_0.merge(self.df_B, how='left', on=['DAMAGE_ID', 'USER_ID'])
self.h_v['var_B_df'] = self.h_v['class0_with_B'].dropna(subset=['INS_DATE']).drop(columns=['INS_DATE'])
self.h_v['class1_with_B'] = self.h_v['class_1'].append(self.h_v['var_B_df'])
self.h_v['notify_1'] = self.n_df.merge(self.h_v['class_1'], how='left', on=['USER_ID'])
notify_class1 = self.h_v['notify_1'][(self.h_v['notify_1'].REG_DATE < self.h_v['notify_1'].LAST_SEND_DATE) &\
(self.h_v['notify_1'].LAST_SEND_DATE < self.h_v['notify_1'].CLIENT_SWITCH_DATE - timedelta(seconds=120))]
self.h_v['notify_class1'] = notify_class1.drop_duplicates(subset=['USER_ID'])
self.h_v['notify_class1_inc'] = notify_class1.merge(self.i_df, how='left', on=['DAMAGE_ID', 'USER_ID'])
self.h_v['sms_df'] = self.h_v['notify_class1_inc'][(self.h_v['notify_class1_inc'].MES_TYPE == 'sms') & (self.h_v['notify_class1_inc'].STATE == 'send')]
self.h_v['sms_inc'] = self.h_v['sms_df'].dropna(subset=['INC_ID'])
self.h_v['push_df'] = self.h_v['notify_class1_inc'][(self.h_v['notify_class1_inc'].MES_TYPE == 'push') & (self.h_v['notify_class1_inc'].STATE == 'send')]
self.h_v['push_inc'] = self.h_v['push_df'].dropna(subset=['INC_ID'])
dam_filter_not_pass_df = new_df_inc[~new_df_inc.DAMAGE_ID.isin(self.h_v['notified_dam_df'].DAMAGE_ID.values)]
filter_not_pass_df_orig = self.d_df[self.d_df.DAMAGE_ID.isin(dam_filter_not_pass_df.DAMAGE_ID.values)]
filter_not_pass_df2 = dam_filter_not_pass_df[['DAMAGE_ID', 'DAM_USERS_COUNT']]\
.drop_duplicates(subset=['DAMAGE_ID'])
self.h_v['filter_not_pass_df'] = filter_not_pass_df_orig.merge(filter_not_pass_df2,how='right', on=['DAMAGE_ID']).dropna()
self.h_v['filter_not_pass_df']['Diff'] = (self.h_v['filter_not_pass_df'].CLIENT_SWITCH_DATE -
self.h_v['filter_not_pass_df'].REG_DATE).dt.total_seconds()/60
def get_inc_for_hour(self):
self.i_df_1 = self.i_df.merge(self.d_df[['DAMAGE_ID','REG_DATE']], how='left', on=['DAMAGE_ID']).drop_duplicates()
self.i_df_1['DIFF'] = self.i_df_1.INC_REG_DATE - self.i_df_1.REG_DATE
self.i_df_1 = self.i_df_1[self.i_df_1.DIFF.dt.total_seconds().astype(int) < 3600 * 2]
self.i_df_1 = self.i_df_1[self.i_df.columns]
def adding_var_b(self, list_=['i', 'i_u', 'i_o', 'i_o_1']):
for el in list_:
self.d_m[f'vb_{el}'] += (self.d_m[f'va_{el}'] - (self.d_m[f'sa_{el}'] + self.d_m[f'sb_{el}'] + self.d_m[f'pa_{el}'] + self.d_m[f'pb_{el}']))
self.d_m[f'va_{el}'] = self.d_m[f'sa_{el}'] + self.d_m[f'sb_{el}'] + self.d_m[f'pa_{el}'] + self.d_m[f'pb_{el}']
try:
self.d_m['vb_i_op'] = self.d_m['vb_i_o'] / self.d_m['vb_a_u']
self.d_m['vb_i_op_1'] = self.d_m['vb_i_o_1'] / self.d_m['vb_a_u']
except ZeroDivisionError:
self.d_m['vb_i_op'] = self.d_m['vb_i_op_1'] = 0
try:
self.d_m['va_i_op'] = self.d_m['va_i_o'] / self.d_m['va_a_u']
self.d_m['va_i_op_1'] = self.d_m['va_i_o_1'] / self.d_m['va_a_u']
except ZeroDivisionError:
self.d_m['va_i_op'] = self.d_m['va_i_op_1'] = 0
def get_stat(self, df, mode, name, flag=True):
if 'dam' in mode:
self.d_m[f'{name}_d'] = df.DAMAGE_ID.nunique()
if 'aff' in mode:
self.d_m[f'{name}_a'] = df.shape[0]
if 'aff_u' in mode:
self.d_m[f'{name}_a_u'] = df.USER_ID.nunique()
if flag and 'inc' in mode:
df_inc = self.i_df.merge(df, how='left', on=['DAMAGE_ID', 'USER_ID']).dropna(subset=['REG_DATE'])
else: df_inc = df
self.d_m[f'{name}_i'] = df_inc.shape[0] if 'inc' in mode else None
self.d_m[f'{name}_i_u'] = df_inc.USER_ID.nunique() if 'inc_u' in mode else None
self.d_m[f'{name}_i_o'] = df_inc[df_inc.REG_DEPARTMENT_ID.isin([105, 134])].shape[0] if 'inc_o' in mode else None
try:
self.d_m[f'{name}_i_op'] = self.d_m[f'{name}_i_o'] / self.d_m[f'{name}_a_u'] if 'inc_op' in mode else None
except ZeroDivisionError:
self.d_m[f'{name}_i_op'] = 0
if 'inc_o_1' in mode:
if flag:
df_inc_1 = self.i_df_1.merge(df, how='left', on=['DAMAGE_ID', 'USER_ID']).dropna(subset=['REG_DATE'])
elif not flag:
df_inc_1 = self.i_df_1[self.i_df_1.INC_ID.isin(df_inc.INC_ID.values)]
self.d_m[f'{name}_i_o_1'] = df_inc_1[df_inc_1.REG_DEPARTMENT_ID.isin([105, 134])].shape[0]
try:
self.d_m[f'{name}_i_op_1'] = self.d_m[f'{name}_i_o_1'] / self.d_m[f'{name}_a_u'] if 'inc_op_1' in mode else None
except ZeroDivisionError:
self.d_m[f'{name}_i_op_1'] = 0
def block_a(self):
self.get_stat(self.d_df, ['dam', 'aff', 'aff_u'], 'all')
self.get_stat(self.i_df, ['inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'all', flag=False)
def block_b(self):
self.get_stat(self.i_df, ['dam'], 'inc')
only_inc_dam = self.d_df[self.d_df.DAMAGE_ID.isin(self.i_df.DAMAGE_ID.values)]
self.get_stat(only_inc_dam, ['aff', 'aff_u'], 'inc')
self.d_m['inc_i'] = self.d_m['all_i']
self.d_m['inc_i_u'] = self.d_m['all_i_u']
self.d_m['inc_i_o'] = self.d_m['all_i_o']
self.d_m['inc_i_op'] = self.d_m['inc_i_o'] / self.d_m['inc_a_u']
self.d_m['inc_i_o_1'] = self.d_m['all_i_o_1']
self.d_m['inc_i_op_1'] = self.d_m['inc_i_o_1'] / self.d_m['inc_a_u']
def block_c(self):
dam_wo_inc = self.d_df[~self.d_df.DAMAGE_ID.isin(self.i_df.DAMAGE_ID.values)]
self.get_stat(dam_wo_inc, ['dam', 'aff', 'aff_u'], 'wo')
def block_d(self):
dam_not_log_df = self.i_df[~self.i_df.DAMAGE_ID.isin(self.l_df.DAMAGE_ID.values)]
self.get_stat(dam_not_log_df, ['dam'], 'nl')
not_log_df = self.d_df[self.d_df.DAMAGE_ID.isin(dam_not_log_df.DAMAGE_ID.values)]
self.get_stat(not_log_df, ['aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'nl')
def block_e(self):
new_df_inc = self.l_df[self.l_df.DAMAGE_ID.isin(self.i_df.DAMAGE_ID.values)]
self.get_stat(new_df_inc, ['dam'], 'li')
log_inc_dam_df = self.d_df[self.d_df.DAMAGE_ID.isin(new_df_inc.DAMAGE_ID.values)]
self.get_stat(log_inc_dam_df, ['aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'li')
def block_f(self):
not_act_inc_df = self.i_df.merge(self.d_df, how='left', on=['DAMAGE_ID', 'USER_ID'])
not_act_inc_df = not_act_inc_df[not_act_inc_df.REG_DATE.isna()]
self.get_stat(not_act_inc_df, ['dam', 'inc', 'inc_u', 'inc_o', 'inc_o_1'], 'na', flag=False)
def block_g(self):
self.get_stat(self.h_v['dam_filter_not_pass_df'], ['dam'], 'fnp')
filter_not_pass_df = self.d_df[self.d_df.DAMAGE_ID.isin(self.h_v['dam_filter_not_pass_df'].DAMAGE_ID.values)]
self.get_stat(filter_not_pass_df, ['aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'fnp')
def block_h(self):
self.get_stat(self.h_v['notified_dam_filter_pass_df'], ['dam'], 'fp')
filter_pass_df = self.d_df[self.d_df.DAMAGE_ID.isin(self.h_v['notified_dam_filter_pass_df'].DAMAGE_ID.values)]
self.get_stat(filter_pass_df, ['aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'fp')
def block_i(self):
billing_dam = self.h_v['not_notified_dam_df'][self.h_v['not_notified_dam_df'].DAMAGE_ID.\
isin(self.h_v['filter_not_pass_df'].DAMAGE_ID.values)]
billing_dam = self.h_v['filter_not_pass_df'][self.h_v['filter_not_pass_df'].DAMAGE_ID.isin(billing_dam.DAMAGE_ID.values)]
return billing_dam
def block_j(self):
night_dam = self.h_v['filter_not_pass_df'].copy()
night_dam['weekday'] = night_dam.REG_DATE.dt.weekday
night_dam = night_dam[(night_dam.REG_DATE.dt.hour >= 22) | ((night_dam.REG_DATE.dt.hour < 8) & (night_dam.weekday.isin([0, 1, 2, 3, 4]))) |\
((night_dam.REG_DATE.dt.hour < 10) & (night_dam.weekday.isin([5, 6])))]
night_dam.drop(columns=['weekday'], inplace=True)
self.get_stat(night_dam, ['dam', 'aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'nt')
return night_dam
def block_l(self, night_dam):
duration_dam = self.h_v['filter_not_pass_df'][(self.h_v['filter_not_pass_df']['Diff'] <= 10) &
(~self.h_v['filter_not_pass_df'].DAMAGE_ID.isin(night_dam.DAMAGE_ID.values))]
self.get_stat(duration_dam, ['dam', 'aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'dur')
return duration_dam
def block_k(self, night_dam, duration_dam):
count_dam = self.h_v['filter_not_pass_df'][(~self.h_v['filter_not_pass_df'].DAMAGE_ID.isin(night_dam.DAMAGE_ID.values)) & \
(~self.h_v['filter_not_pass_df'].DAMAGE_ID.isin(duration_dam.DAMAGE_ID.values)) & \
((self.h_v['filter_not_pass_df'].DAM_USERS_COUNT <= 10) | \
(self.h_v['filter_not_pass_df'].DAM_USERS_COUNT >= 500))]
self.get_stat(count_dam, ['dam', 'aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'ct')
return count_dam
def blocks_ijkl(self):
billing_dam = self.block_i()
night_dam = self.block_j()
duration_dam = self.block_l(night_dam)
count_dam = self.block_k(night_dam, duration_dam)
def diff_dam(df):
return df[~(df.DAMAGE_ID.isin(duration_dam.DAMAGE_ID.values)) &
~(df.DAMAGE_ID.isin(night_dam.DAMAGE_ID.values)) &
~(df.DAMAGE_ID.isin(count_dam.DAMAGE_ID.values))]
billing_dam = diff_dam(billing_dam)
x_dam = self.h_v['filter_not_pass_df'][~self.h_v['filter_not_pass_df'].DAMAGE_ID.isin(billing_dam.DAMAGE_ID.values)]
x_dam = diff_dam(x_dam)
self.get_stat(billing_dam, ['dam', 'aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'b')
self.get_stat(x_dam, ['dam', 'aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'x')
def block_m(self):
self.get_stat(self.h_v['class1_with_B'], ['aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'c1')
def block_n(self):
self.get_stat(self.h_v['var_B_df'], ['aff_u', 'inc', 'inc_u', 'inc_o', 'inc_o_1'], 'vb')
self.d_m['vb_d'] = self.h_v['notified_dam_filter_pass_df'][self.h_v['notified_dam_filter_pass_df'].DAMAGE_ID.isin(self.df_B.DAMAGE_ID.values)].DAMAGE_ID.nunique()
def block_o(self):
class0_df = self.h_v['class0_with_B'][self.h_v['class0_with_B'].INS_DATE.isna()]
self.get_stat(class0_df, ['aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'c0')
def block_p(self):
self.d_m['push_a_u'] = self.h_v['push_df'].USER_ID.nunique()
def block_q(self):
self.d_m['sms_a_u'] = self.h_v['sms_df'].USER_ID.nunique()
def block_r(self):
notify_class1_inc = self.h_v['notify_class1'].merge(self.i_df, how='left', on=['DAMAGE_ID', 'USER_ID'])
self.h_v['not_send_df'] = notify_class1_inc[notify_class1_inc.STATE == 'not send']
self.d_m['ns_a_u'] = self.h_v['not_send_df'].shape[0]
def block_s(self):
notify_class1_inc = self.h_v['notify_class1'].merge(self.i_df, how='left', on=['DAMAGE_ID', 'USER_ID'])
self.h_v['handling_df'] = notify_class1_inc[notify_class1_inc.STATE == 'handling']
self.d_m['h_a_u'] = self.h_v['handling_df'].shape[0]
def block_t(self):
notify_not_prop = self.n_df[self.n_df.USER_ID.isin(self.h_v['class_1'].USER_ID.values)]
notify_not_prop = notify_not_prop[~notify_not_prop.USER_ID.isin(self.h_v['notify_class1'].USER_ID.values)]
if notify_not_prop.empty:
self.h_v['notify_not_prop'] = self.h_v['notify_after_night'] = notify_not_prop.copy()
self.d_m['nant_a_u'] = self.d_m['nnp_a_u'] = 0
else:
self.h_v['notify_after_night'] = notify_not_prop[notify_not_prop.LAST_SEND_DATE.dt.hour.isin([8])] # Утро только в 8
self.d_m['nant_a_u'] = self.h_v['notify_after_night'].shape[0]
self.h_v['notify_not_prop'] = notify_not_prop[~notify_not_prop.LAST_SEND_DATE.dt.hour.isin([8])]
self.d_m['nnp_a_u'] = self.h_v['notify_not_prop'].shape[0]
def block_u(self):
not_notify_usrs = set(self.h_v['class_1'].USER_ID.values) - set(self.h_v['notify_class1'].USER_ID.values) \
- set(self.h_v['notify_after_night'].USER_ID.values) \
- set(self.h_v['notify_not_prop'].USER_ID.values)
self.h_v['not_notify_df'] = self.h_v['class_1'][self.h_v['class_1'].USER_ID.isin(not_notify_usrs)]
self.get_stat(self.h_v['not_notify_df'], ['aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'nn')
def block_v(self):
push_before_df = self.h_v['push_inc'][self.h_v['push_inc'].INC_REG_DATE <= self.h_v['push_inc'].LAST_SEND_DATE]
self.get_stat(push_before_df, ['inc', 'inc_u', 'inc_o', 'inc_o_1'], 'pb', flag=False)
try:
self.d_m['pb_i_op'] = self.d_m['pb_i_o'] / self.d_m['push_a_u']
self.d_m['pb_i_op_1'] = self.d_m['pb_i_o_1'] / self.d_m['push_a_u']
except ZeroDivisionError:
self.d_m['pb_i_op'] = self.d_m['pb_i_op_1'] = 0
def block_w(self):
push_after_df = self.h_v['push_inc'][self.h_v['push_inc'].INC_REG_DATE > self.h_v['push_inc'].LAST_SEND_DATE]
push_after_df = self.h_v['var_A_df'][self.h_v['var_A_df'].USER_ID.isin(push_after_df.USER_ID.values)]
self.get_stat(push_after_df, ['inc', 'inc_u', 'inc_o', 'inc_o_1'], 'pa')
try:
self.d_m['pa_i_op'] = self.d_m['pa_i_o'] / self.d_m['push_a_u']
self.d_m['pa_i_op_1'] = self.d_m['pa_i_o_1'] / self.d_m['push_a_u']
except ZeroDivisionError:
self.d_m['pa_i_op'] = self.d_m['pa_i_op_1'] = 0
self.adding_var_b()
def block_x(self):
sms_before_df = self.h_v['sms_inc'][self.h_v['sms_inc'].INC_REG_DATE <= self.h_v['sms_inc'].LAST_SEND_DATE]
self.get_stat(sms_before_df, ['inc', 'inc_u', 'inc_o', 'inc_o_1'], 'sb', flag=False)
try:
self.d_m['sb_i_op'] = self.d_m['sb_i_o'] / self.d_m['sms_a_u']
self.d_m['sb_i_op_1'] = self.d_m['sb_i_o_1'] / self.d_m['sms_a_u']
except ZeroDivisionError:
self.d_m['sb_i_op'] = self.d_m['sb_i_op_1'] = 0
def block_y(self):
sms_after_df = self.h_v['sms_inc'][self.h_v['sms_inc'].INC_REG_DATE > self.h_v['sms_inc'].LAST_SEND_DATE]
sms_after_df = self.h_v['var_A_df'][self.h_v['var_A_df'].USER_ID.isin(sms_after_df.USER_ID.values)]
self.get_stat(sms_after_df, ['inc', 'inc_u', 'inc_o', 'inc_o_1'], 'sa')
try:
self.d_m['sa_i_op'] = self.d_m['sa_i_o'] / self.d_m['sms_a_u']
self.d_m['sa_i_op_1'] = self.d_m['sa_i_o_1'] / self.d_m['sms_a_u']
except ZeroDivisionError:
self.d_m['sa_i_op'] = self.d_m['sa_i_op_1'] = 0
def new_block_b(self):
self.h_v['var_A_df'] = self.h_v['class_1'][~self.h_v['class_1'].USER_ID.isin(np.union1d(self.h_v['mistakes_df'].USER_ID.values, self.h_v['not_notify_df'].USER_ID.values))]
self.get_stat(self.h_v['var_A_df'], ['aff_u', 'inc', 'inc_u', 'inc_o', 'inc_o_1'], 'va')
def new_block_a(self):
self.h_v['mistakes_df'] = pd.concat([self.h_v['notify_after_night'], self.h_v['notify_not_prop'],
self.h_v['handling_df'], self.h_v['not_send_df']],sort=True)
self.d_m['m_a_u'] = self.h_v['mistakes_df'].shape[0]
class1_inc_df = self.i_df.merge(self.h_v['class1_with_B'], how='right', on=['USER_ID', 'DAMAGE_ID']).dropna(subset=['INC_ID'])
mistakes_inc_df = class1_inc_df[class1_inc_df.USER_ID.isin(self.h_v['mistakes_df'].USER_ID.values)]
self.d_m['m_i'] = mistakes_inc_df.shape[0]
self.d_m['m_i_u'] = mistakes_inc_df.USER_ID.nunique()
self.d_m['m_i_o'] = mistakes_inc_df[mistakes_inc_df.REG_DEPARTMENT_ID.isin([105, 134])].shape[0]
self.d_m['m_i_o_1'] = mistakes_inc_df[(mistakes_inc_df.REG_DEPARTMENT_ID.isin([105, 134])) &
(mistakes_inc_df.INC_ID.isin(self.i_df_1.INC_ID.values))].shape[0]
try:
self.d_m['m_i_op'] = self.d_m['m_i_o'] / self.d_m['m_a_u']
self.d_m['m_i_op_1'] = self.d_m['m_i_o_1'] / self.d_m['m_a_u']
except ZeroDivisionError:
self.d_m['m_i_op'] = self.d_m['m_i_op_1'] = 0
class GraphAndReportMaker(MetricsCollection):
affected = 'Affected: {}, users: {}'
incidents = 'Incidents: {}, users: {}'
operator = 'Operator: {} / {}'
operator2 = 'Operator: {} ({:.2%}) / {} ({:.2%})'
def run_all(self):
self.get_inc_for_hour()
self.get_helper_variables()
self.block_a()
self.block_b()
self.block_c()
self.block_d()
self.block_e()
self.block_f()
self.block_g()
self.block_h()
self.blocks_ijkl()
self.block_m()
self.block_n()
self.block_o()
self.block_p()
self.block_q()
self.block_r()
self.block_s()
self.block_t()
self.block_u()
self.block_v()
self.block_x()
self.new_block_a()
self.new_block_b()
self.block_y()
self.block_w()
def make_block(self, title, mode, name):
if 'x_0' in mode and 'affected' in title:
subblock1 = title.format(self.d_m[f'{name}_a_u'])
elif 'x_0' in mode and 'affected' not in title:
subblock1 = title.format(self.d_m[f'{name}_d'])
else: subblock1 = title
subblock2 = self.affected.format(self.d_m[f'{name}_a'], self.d_m[f'{name}_a_u']) if 'x_1' and 'x_2' in mode else None
subblock3 = self.incidents.format(self.d_m[f'{name}_i'], self.d_m[f'{name}_i_u']) if 'x_3' and 'x_4' in mode else None
if ('x_7' and 'x_8') not in mode and ('x_5' and 'x_6') in mode:
subblock4 = self.operator.format(self.d_m[f'{name}_i_o'], self.d_m[f'{name}_i_o_1'])
elif 'x_5' and 'x_6' and 'x_7' and 'x_8' in mode:
subblock4 = self.operator2.format(self.d_m[f'{name}_i_o'], self.d_m[f'{name}_i_op'], self.d_m[f'{name}_i_o_1'], self.d_m[f'{name}_i_op_1'])
else:
subblock4 = None
return subblock1, subblock2, subblock3, subblock4
def x4_block(self, title, name):
return self.make_block(title, ['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8'], name)
def x3_block_aff(self, title, name):
return self.make_block(title, ['x_0', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8'], name)
def x3_block_inc(self, title, name):
return self.make_block(title, ['x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8'], name)
def get_graph(self):
A1, A2, A3, A4 = self.x4_block('All damages: {}', 'all')
B1, B2, B3, B4 = self.x4_block('Dam with inc: {}', 'inc')
C1, C2, _, _ = self.make_block('Dam w/o inc: {}',['x_0', 'x_1', 'x_2'], 'wo')
D1, D2, D3, D4 = self.x4_block('Not in logs: {}', 'nl')
E1, E2, E3, E4 = self.x4_block('Predicted: {}', 'li')
F1, _, F2, F3 = self.make_block('Incidents not act: {}', ['x_0', 'x_3', 'x_4', 'x_5', 'x_6'], 'na')
G1, G2, G3, G4 = self.x4_block('Filters not passed: {}', 'fnp')
H1, H2, H3, H4 = self.x4_block('Notifications sent: {}', 'fp')
I1, I2, I3, I4 = self.x4_block('Night dam: {}', 'nt')
J1, J2, J3, J4 = self.x4_block('Count dam: {}', 'ct')
K1, K2, K3, K4 = self.x4_block('Duration dam: {}', 'dur')
Y1, Y2, Y3, Y4 = self.x4_block('Billing dam: {}', 'b')
L1, L2, L3, L4 = self.x4_block('X dam: {}', 'x')
M1, _, M2, M3 = self.x3_block_aff('Class 1, affected: {}', 'c1')
# Z1, _, Z2, Z3 = self.x3_block_aff('Var B, affected: {}', 'vb') ## Var B
N1, _, N2, N3 = self.x3_block_aff('Class 0, affected: {}', 'c0')
O1 = 'Push sent: {}'.format(self.d_m['push_a_u'])
P1 = 'SMS sent: {}'.format(self.d_m['sms_a_u'])
Q1 = 'Not sent: {}'.format(self.d_m['ns_a_u'])
R1 = 'Handling: {}'.format(self.d_m['h_a_u'])
S1, _, S2, S3 = self.x3_block_aff('Not notify, affected: {}', 'nn')
T1 = 'Notify after niqht: {}'.format(self.d_m['nant_a_u'])
U1, _, U2, U3 = self.x3_block_inc('Incidents before push', 'pb')
V1, _, V2, V3 = self.x3_block_inc('Incidents after push', 'pa')
W1, _, W2, W3 = self.x3_block_inc('Incidents before sms', 'sb')
X1, _, X2, X3 = self.x3_block_inc('Incidents after sms', 'sa')
new_A1, _, new_A2, new_A3 = self.x3_block_aff('Var A, affected: {}', 'va')
new_B1, _, new_B2, new_B3 = self.x3_block_aff('Mistakes, affected: {}', 'm')
new_C = 'Change usluga: {}'.format(self.d_m['nnp_a_u'])
dot = Digraph('structs', node_attr={'shape': 'record'})
dot.node('A', f'{{ <f0> {A1}|<f1> {A2}|<f2> {A3}|<f3> {A4}}}')
dot.node('B', f'{{ <f0> {B1}|<f1> {B2}|<f2> {B3}|<f3> {B4}}}')
dot.node('C', f'{{ <f0> {C1}|<f1> {C2}}}')
dot.node('D', f'{{ <f0> {D1}|<f1> {D2}|<f2> {D3}|<f3> {D4}}}')
dot.node('E', f'{{ <f0> {E1}|<f1> {E2}|<f2> {E3}|<f3> {E4}}}')
dot.node('F', f'{{ <f0> {F1}|<f1> {F2}|<f2> {F3}}}')
dot.node('G', f'{{ <f0> {G1}|<f1> {G2}|<f2> {G3}|<f3> {G4}}}')
dot.node('H', f'{{ <f0> {H1}|<f1> {H2}|<f2> {H3}|<f3> {H4}}}')
dot.node('I', f'{{ <f0> {I1}|<f1> {I2}|<f2> {I3}|<f3> {I4}}}')
dot.node('K', f'{{ <f0> {K1}|<f1> {K2}|<f2> {K3}|<f3> {K4}}}')
dot.node('J', f'{{ <f0> {J1}|<f1> {J2}|<f2> {J3}|<f3> {J4}}}')
dot.node('Y', f'{{ <f0> {Y1}|<f1> {Y2}|<f2> {Y3}|<f3> {Y4}}}')
dot.node('L', f'{{ <f0> {L1}|<f1> {L2}|<f2> {L3}|<f3> {L4}}}')
dot.node('M', f'{{ <f0> {M1}|<f1> {M2}|<f2> {M3}}}')
# dot.node('Z', f'{{ <f0> {Z1}|<f1> {Z2}|<f2> {Z3}}}') ## Var B
dot.node('N', f'{{ <f0> {N1}|<f1> {N2}|<f2> {N3}}}')
dot.node('O', O1)
dot.node('P', P1)
dot.node('Q', Q1)
dot.node('R', R1)
dot.node('T', T1)
dot.node('U', f'{{ <f0> {U1}|<f1> {U2}|<f2> {U3}}}')
dot.node('V', f'{{ <f0> {V1}|<f1> {V2}|<f2> {V3}}}')
dot.node('W', f'{{ <f0> {W1}|<f1> {W2}|<f2> {W3}}}')
dot.node('X', f'{{ <f0> {X1}|<f1> {X2}|<f2> {X3}}}')
dot.node('new_A', f'{{ <f0> {new_A1}|<f1> {new_A2}|<f2> {new_A3}}}')
dot.node('new_B', f'{{ <f0> {new_B1}|<f1> {new_B2}|<f2> {new_B3}}}')
dot.node('new_C', new_C)
dot.node('S', f'{{ <f0> {S1}|<f1> {S2}|<f2> {S3}}}')
dot.edges(['AB', 'AC', 'BD','BE', 'BF', 'EG','EH',
'GI', 'GJ','GK','GY','GL', 'HM', 'HN', ## 'MZ',
('M', 'new_A'), ('M', 'new_B'), 'MS',('new_A', 'O'),
('new_A', 'P'), ('new_B', 'Q'), ('new_B', 'R'),
('new_B', 'new_C'), ('new_B', 'T'),
'OU', 'OV', 'PW', 'PX'])
dot.render('report','./artefacts/', view=False)
def insert_report(self, weekly=True):
with open('./configs/params.json', 'r') as f:
params = json.load(f)
predict_logs = Logs(user=params['predict_logs']['user'],
password=params['predict_logs']['password'],
host=params['predict_logs']['host'],
db_name=params['predict_logs']['db_name'])
df = pd.DataFrame(list(self.d_m.items()), columns=['METRIC', 'Значение'])
df['Значение'] = df['Значение'].round(decimals=4)
df['Группа'] = df['METRIC'].apply(lambda x: re.split(r'_', x, maxsplit=1)[0])
df['Метрика'] = df['METRIC'].apply(lambda x: re.split(r'_', x, maxsplit=1)[1])
df.drop(columns=['METRIC'], inplace=True)
df.dropna(inplace=True)
df.loc[df['Метрика'].isin(['i_op', 'i_op_1']), 'Значение'] = df['Значение'] * 100
df_old = df.copy()
df_old['INS_DATE'] = str(date.today())
if weekly:
predict_logs.insert_report_logs(df_old)
else:
predict_logs.insert_report_logs_daily(df_old)
return df
def get_report(self):
df = self.insert_report()
dict_ = {'all': 'Все аварии', 'inc': 'Аварии с инцидентами', 'wo': 'Аварии без инцидентов',
'nl': 'Нет в логах', 'li': 'Были предсказания', 'na': 'Нет в актах', 'fnp': 'Не прошли фильтры',
'fp': 'Отправлены оповещения', 'nt': 'Ночные аварии', 'ct': 'Аварии с неверным кол-вом пострадавших',
'dur': 'Аварии с неверной длительностью', 'b': 'Аварии не прошедшие логику биллинга/Диспетчера',
'x': 'Неизвестная категория аварий', 'c1': 'Класс 1', ## 'vb': 'Категория B',
'c0': 'Класс 0', 'push': 'Пуши отправлены', 'sms': 'Смс отправлены', 'ns': 'Не отправлено оповещение',
'h': 'Зависло', 'nant': 'Ошибочные отправки после ночи','nnp': 'Изменилась услуга', 'nn': 'Не оповещены',
'pb': 'Инциденты до пушей', 'sb': 'Инциденты до смс', 'm': 'Ошибки', 'va': 'Предсказаны, категория A',
'sa': 'Инциденты после смс', 'pa': 'Инциденты после пушей'}
dict_2 = {'d': 'Количество аварий', 'a': 'Все пострадавшие', 'a_u': 'Уникальные пострадавшие',
'i': 'Все инциденты', 'i_u': 'Инциденты от уникальных пострадавших', 'i_o': 'Все обращения до оператора',
'i_op': 'Процент обращений до оператора от уникального кол-ва пострадавших',
'i_op_1': 'Процент обращений до оператора в первые 2 часа от уникального кол-ва пострадавших',
'i_o_1': 'Обращения до оператора в первые 2 часа'}
df['Группа'] = df['Группа'].map(dict_)
df = df[df['Группа'].notna()]
df['Метрика'] = df['Метрика'].map(dict_2)
df = df.set_index(['Группа', 'Метрика'])
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment