Last active
October 29, 2020 14:17
-
-
Save AndrewTrefilov/8ac5db362f2fb8825820b100cb8bfecd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import requests | |
import pickle | |
import numpy as np | |
import time | |
from datetime import timedelta, date | |
import re | |
import pymysql.cursors | |
import pymysql | |
from graphviz import Digraph | |
import json | |
from utils import * | |
class OracleDBHandler(): | |
def __init__(self): | |
self.host = 'oracle_db_handler' | |
self.port = 8080 | |
def select(self, connection_string, query_text): | |
""" | |
:rtype: object | |
""" | |
params = {'connection_string': connection_string, | |
'query_text': query_text} | |
for i in range(0, 3): | |
try: | |
response = requests.get(f'http://{self.host}:{self.port}/select', params=params) | |
df = pd.DataFrame.from_dict(pickle.loads(response.content)) | |
print('Success') | |
return df | |
except Exception as e: | |
print(str(e)) | |
time.sleep(10) | |
def df_from_mysql_db(statement): | |
connection = pymysql.connect(host='log_db', user='pyuser', passwd='12345', db='local_logs', | |
cursorclass=pymysql.cursors.DictCursor) | |
df = pd.read_sql(statement, connection) | |
connection.close() | |
return df | |
def get_all_from_mysql(): | |
statement = 'select * from XXX' | |
df = df_from_mysql_db(statement) | |
return df | |
def get_b_from_mysql(): | |
statement = 'select * from XXX' | |
df = df_from_mysql_db(statement) | |
return df | |
class DataLoader(): | |
__query_for_predicted_users = """ | |
SELECT * | |
FROM XXX | |
WHERE ins_date between to_date('{:%Y-%m-%d}', 'yyyy-mm-dd') | |
AND to_date('{:%Y-%m-%d}', 'yyyy-mm-dd') | |
""" | |
__query_for_damage_data = """ | |
SELECT d.id as damage_id, | |
d.reg_date, | |
d.CLIENT_SWITCH_DATE, | |
dous.user_id | |
FROM XXX d | |
JOIN XXX du ON du.damage_id = d.id AND du.usluga_id = 1 | |
JOIN XXX do ON d.id = do.damage_id | |
JOIN XXX dous ON do.id = dous.object_id AND DOUS.USLUGA_ID=1 | |
WHERE d.damage_location = 'network' | |
AND (d.category = 0) | |
AND d.REG_DATE between to_date('{:%Y-%m-%d}', 'yyyy-mm-dd') | |
AND to_date('{:%Y-%m-%d}', 'yyyy-mm-dd') | |
""" | |
__query_for_incidents = """ | |
SELECT /*+ INDEX(d XXX) INDEX(i XXX)*/ | |
d.id as damage_id, | |
i.id as inc_id, | |
i.reg_date as inc_reg_date, | |
i.user_id, | |
i.channel_int_id, | |
cd.phone as inc_phone, | |
i.reg_user_id, | |
i.reg_department_id, | |
i.reg_comments | |
FROM XXX i | |
JOIN XXX io ON io.incident_id = i.id AND io.name = 'damages' | |
JOIN XXX d ON d.id = io.rec_id | |
JOIN XXX du ON du.damage_id = d.id AND du.usluga_id = 1 | |
LEFT JOIN XXX cd ON cd.id = i.call_detail_id | |
WHERE d.damage_location = 'network' | |
AND (d.category = 0) | |
AND (i.service_int_id = 5226 OR i.service_int_id = 5922) | |
AND d.REG_DATE between to_date('{:%Y-%m-%d}', 'yyyy-mm-dd') | |
and to_date('{:%Y-%m-%d}', 'yyyy-mm-dd') | |
AND (i.channel_int_id in (4085, 4270, 4125, 5215, 4661, 6362) | |
OR i.REG_DEPARTMENT_ID=78 | |
OR i.REG_DEPARTMENT_ID=134 | |
OR i.REG_DEPARTMENT_ID=105 | |
OR i.REG_DEPARTMENT_ID=81) | |
""" | |
__query_for_notify_data = """ | |
SELECT h.user_id, | |
h.exec_date AS last_send_date, | |
lower (task_class) AS mes_type, | |
decode ( | |
state, | |
1, 'send', | |
2, 'not send', | |
3, 'not send', | |
4, 'not send', | |
'unknown' | |
) AS state, | |
NULL AS review_date | |
FROM XXX h | |
WHERE task_class IN ('SMS') | |
AND h.nas_ip like '%237%' | |
AND h.assign_date >= to_date('{:%Y-%m-%d}', 'yyyy-mm-dd') | |
AND h.assign_date < to_date('{:%Y-%m-%d}', 'yyyy-mm-dd') + 1 | |
AND state <> 4 | |
UNION ALL | |
SELECT t.user_id, | |
ins_date AS last_send_date, | |
'push' AS mes_type, | |
decode ( | |
t.status, | |
0, 'new', | |
1, 'handling', | |
2, 'send', | |
3, 'not send', | |
'unknown' | |
) AS state, | |
t.review_date | |
FROM XXX t | |
WHERE pattern_ids like '%237%' | |
AND t.ins_date >= to_date ('{:%Y-%m-%d}', 'yyyy-mm-dd') | |
AND t.ins_date < to_date ('{:%Y-%m-%d}', 'yyyy-mm-dd') + 1 | |
""" | |
__query_for_processed_damages = """ | |
SELECT * | |
FROM XXX | |
WHERE ins_date between to_date('{:%Y-%m-%d}', 'yyyy-mm-dd') | |
AND to_date('{:%Y-%m-%d}', 'yyyy-mm-dd') | |
""" | |
def __init__(self): | |
self.predicted_users = None | |
self.damage_df = None | |
self.incidents = None | |
self.notify_data = None | |
self.log_df = None | |
self.df_variant_B = None | |
def load(self, start_date, end_date): | |
self.predicted_users = OracleDBHandler().select('main_ml_connection', | |
self.__query_for_predicted_users.format(start_date, end_date)) | |
self.damage_df = OracleDBHandler().select('main_ml_connection', | |
self.__query_for_damage_data.format(start_date, end_date)) | |
self.incidents = OracleDBHandler().select('main_ml_connection', | |
self.__query_for_incidents.format(start_date, end_date)) | |
self.notify_data = OracleDBHandler().select('main_ml_connection', | |
self.__query_for_notify_data.format(start_date, end_date, | |
start_date, end_date)) | |
self.processed_damages = OracleDBHandler().select('main_ml_connection', | |
self.__query_for_processed_damages.format(start_date, end_date)) | |
self.log_df = get_all_from_mysql() | |
self.df_variant_B = get_b_from_mysql() | |
class MetricCounter(): | |
def __init__(self): | |
self.stat_df = None | |
def unite_parts(self, predicted_users, damage_df, incidents, notify_data): | |
self.stat_df = damage_df[['DAMAGE_ID', 'REG_DATE', 'USER_ID']] | |
damage_df = damage_df.groupby(['DAMAGE_ID', 'REG_DATE', | |
'CLIENT_SWITCH_DATE']).USER_ID.count() \ | |
.reset_index() \ | |
.rename(columns={'USER_ID': 'DAM_USERS_COUNT'}) | |
damage_df['DOWNTIME_MIN'] = (damage_df.CLIENT_SWITCH_DATE - damage_df.REG_DATE).dt.total_seconds() / 60 | |
# primetime - 09:00 - 22:00 | |
damage_df['is_primetime'] = damage_df.REG_DATE.apply(lambda x: 1 if x.hour in range(9, 22) else 0) | |
damage_df.drop('CLIENT_SWITCH_DATE', axis=1, inplace=True) | |
predicted_users['is_predicted'] = 1 | |
predicted_users = predicted_users.rename(columns={'IS_NOTIFIED_ABOUT_START': 'is_notified'}) | |
predicted_users.drop(['INS_DATE', 'IS_NOTIFIED_ABOUT_CLOSE'], axis=1, inplace=True) | |
# 6294308 - чатбот, 286738 - job, 661641 - user_voip | |
incidents['is_inc_to_person'] = incidents.REG_USER_ID.apply( | |
lambda x: 0 if x in [6294308, 286738, 661641] else 1) | |
incidents.drop('REG_USER_ID', axis=1, inplace=True) | |
incidents['had_incident'] = 1 | |
incidents['INC_PHONE'] = incidents.REG_COMMENTS.str.extract(r'\[(\d+)\]') | |
incidents.drop('REG_COMMENTS', axis=1, inplace=True) | |
self.stat_df = self.stat_df.merge(damage_df, on=['DAMAGE_ID', 'REG_DATE'], how='left') | |
self.stat_df = self.stat_df.merge(predicted_users, on=['DAMAGE_ID', 'USER_ID'], how='left') | |
self.stat_df['date'] = self.stat_df.REG_DATE.dt.date | |
notify_data.LAST_SEND_DATE = pd.to_datetime(notify_data.LAST_SEND_DATE) | |
notify_data['date'] = notify_data.LAST_SEND_DATE.dt.date | |
self.stat_df = self.stat_df.merge(notify_data, on=['USER_ID', 'date'], how='left') | |
self.stat_df = self.stat_df.merge(incidents, on=['DAMAGE_ID', 'USER_ID'], how='left') | |
self.stat_df.had_incident = self.stat_df.had_incident.fillna(0).astype(int) | |
self.stat_df.is_notified = self.stat_df.is_notified.fillna(0).astype(int) | |
self.stat_df.is_predicted = self.stat_df.is_predicted.fillna(0).astype(int) | |
self.stat_df.drop('date', axis=1, inplace=True) | |
def get_incidents_after_notifying(self, start_date, end_date): | |
df = self.stat_df[(self.stat_df.REG_DATE.dt.date >= start_date) & (self.stat_df.REG_DATE.dt.date <= end_date)].copy() | |
df = df[(df.is_predicted == 1) & \ | |
(df.is_notified == 1) & \ | |
(df.had_incident == 1)] | |
df = df[['DAMAGE_ID', 'REG_DATE', 'USER_ID', 'MES_TYPE', 'PHONE', 'STATE', | |
'LAST_SEND_DATE', 'REVIEW_DATE', 'INC_ID', 'INC_REG_DATE', 'CHANNEL_INT_ID', | |
'INC_PHONE', 'is_inc_to_person']] | |
df.STATE = df.STATE.fillna('not send') | |
df.INC_ID = df.INC_ID.astype(int) | |
df.CHANNEL_INT_ID = df.CHANNEL_INT_ID.astype(int) | |
df.is_inc_to_person = df.is_inc_to_person.astype(int) | |
df.CHANNEL_INT_ID = df.CHANNEL_INT_ID.map({4661: 'МП', 5215: 'Чат МП', | |
4085: 'IVR', 4125: 'Телефон', | |
6362: 'Чат/Соцсети'}) | |
df.columns = ['DAMAGE_ID', 'REG_DATE', 'USER_ID', 'Тип уведомления', 'Номер для отправки', 'Состояние', | |
'Дата отправки', 'Дата просмотра (для пушей)', 'INC_ID', 'INC_REG_DATE', | |
'Канал обращения', 'Номер с которого обратились', 'Обращение до оператора'] | |
df.sort_values(['DAMAGE_ID', | |
'USER_ID', | |
'INC_REG_DATE']).to_csv('./artefacts/incidents_after_notifying.csv', | |
index=False) | |
return | |
class MetricsCollection(): | |
d_m = {} | |
h_v = {} | |
def __init__(self, damage_df, incidents, log_df, predicted_users, notify_df, df_variant_B): | |
self.d_df = damage_df.dropna(subset=['CLIENT_SWITCH_DATE']) | |
self.i_df = incidents[incidents.DAMAGE_ID.isin(self.d_df.DAMAGE_ID.values)] | |
self.l_df = log_df[log_df.DAMAGE_ID.isin(self.d_df.DAMAGE_ID.values)].drop_duplicates(subset=['DAMAGE_ID', 'USER_ID']).reset_index(drop=True) | |
self.p_df = predicted_users | |
self.n_df = notify_df | |
self.df_B = df_variant_B | |
def get_helper_variables(self): | |
new_df_inc = self.l_df[self.l_df.DAMAGE_ID.isin(self.i_df.DAMAGE_ID.values)] | |
self.h_v['notified_dam_df'] = self.p_df[(self.p_df.IS_NOTIFIED_ABOUT_START != 0)] | |
self.h_v['not_notified_dam_df'] = self.p_df[(self.p_df.IS_NOTIFIED_ABOUT_START == 0)] | |
self.h_v['notified_dam_filter_pass_df'] = new_df_inc[new_df_inc.DAMAGE_ID.isin(self.h_v['notified_dam_df'].DAMAGE_ID.values)] | |
self.h_v['dam_filter_not_pass_df'] = new_df_inc[~new_df_inc.DAMAGE_ID.isin(self.h_v['notified_dam_df'].DAMAGE_ID.values)] | |
self.h_v['class_1'] = self.d_df[(self.d_df.DAMAGE_ID.isin(self.h_v['notified_dam_filter_pass_df'].DAMAGE_ID.values)) &\ | |
(self.d_df.USER_ID.isin(self.p_df.USER_ID.values))] | |
class_0 = self.d_df[(self.d_df.DAMAGE_ID.isin(self.h_v['notified_dam_filter_pass_df'].DAMAGE_ID.values)) &\ | |
(~self.d_df.USER_ID.isin(self.p_df.USER_ID.values))] | |
self.h_v['class0_with_B'] = class_0.merge(self.df_B, how='left', on=['DAMAGE_ID', 'USER_ID']) | |
self.h_v['var_B_df'] = self.h_v['class0_with_B'].dropna(subset=['INS_DATE']).drop(columns=['INS_DATE']) | |
self.h_v['class1_with_B'] = self.h_v['class_1'].append(self.h_v['var_B_df']) | |
self.h_v['notify_1'] = self.n_df.merge(self.h_v['class_1'], how='left', on=['USER_ID']) | |
notify_class1 = self.h_v['notify_1'][(self.h_v['notify_1'].REG_DATE < self.h_v['notify_1'].LAST_SEND_DATE) &\ | |
(self.h_v['notify_1'].LAST_SEND_DATE < self.h_v['notify_1'].CLIENT_SWITCH_DATE - timedelta(seconds=120))] | |
self.h_v['notify_class1'] = notify_class1.drop_duplicates(subset=['USER_ID']) | |
self.h_v['notify_class1_inc'] = notify_class1.merge(self.i_df, how='left', on=['DAMAGE_ID', 'USER_ID']) | |
self.h_v['sms_df'] = self.h_v['notify_class1_inc'][(self.h_v['notify_class1_inc'].MES_TYPE == 'sms') & (self.h_v['notify_class1_inc'].STATE == 'send')] | |
self.h_v['sms_inc'] = self.h_v['sms_df'].dropna(subset=['INC_ID']) | |
self.h_v['push_df'] = self.h_v['notify_class1_inc'][(self.h_v['notify_class1_inc'].MES_TYPE == 'push') & (self.h_v['notify_class1_inc'].STATE == 'send')] | |
self.h_v['push_inc'] = self.h_v['push_df'].dropna(subset=['INC_ID']) | |
dam_filter_not_pass_df = new_df_inc[~new_df_inc.DAMAGE_ID.isin(self.h_v['notified_dam_df'].DAMAGE_ID.values)] | |
filter_not_pass_df_orig = self.d_df[self.d_df.DAMAGE_ID.isin(dam_filter_not_pass_df.DAMAGE_ID.values)] | |
filter_not_pass_df2 = dam_filter_not_pass_df[['DAMAGE_ID', 'DAM_USERS_COUNT']]\ | |
.drop_duplicates(subset=['DAMAGE_ID']) | |
self.h_v['filter_not_pass_df'] = filter_not_pass_df_orig.merge(filter_not_pass_df2,how='right', on=['DAMAGE_ID']).dropna() | |
self.h_v['filter_not_pass_df']['Diff'] = (self.h_v['filter_not_pass_df'].CLIENT_SWITCH_DATE - | |
self.h_v['filter_not_pass_df'].REG_DATE).dt.total_seconds()/60 | |
def get_inc_for_hour(self): | |
self.i_df_1 = self.i_df.merge(self.d_df[['DAMAGE_ID','REG_DATE']], how='left', on=['DAMAGE_ID']).drop_duplicates() | |
self.i_df_1['DIFF'] = self.i_df_1.INC_REG_DATE - self.i_df_1.REG_DATE | |
self.i_df_1 = self.i_df_1[self.i_df_1.DIFF.dt.total_seconds().astype(int) < 3600 * 2] | |
self.i_df_1 = self.i_df_1[self.i_df.columns] | |
def adding_var_b(self, list_=['i', 'i_u', 'i_o', 'i_o_1']): | |
for el in list_: | |
self.d_m[f'vb_{el}'] += (self.d_m[f'va_{el}'] - (self.d_m[f'sa_{el}'] + self.d_m[f'sb_{el}'] + self.d_m[f'pa_{el}'] + self.d_m[f'pb_{el}'])) | |
self.d_m[f'va_{el}'] = self.d_m[f'sa_{el}'] + self.d_m[f'sb_{el}'] + self.d_m[f'pa_{el}'] + self.d_m[f'pb_{el}'] | |
try: | |
self.d_m['vb_i_op'] = self.d_m['vb_i_o'] / self.d_m['vb_a_u'] | |
self.d_m['vb_i_op_1'] = self.d_m['vb_i_o_1'] / self.d_m['vb_a_u'] | |
except ZeroDivisionError: | |
self.d_m['vb_i_op'] = self.d_m['vb_i_op_1'] = 0 | |
try: | |
self.d_m['va_i_op'] = self.d_m['va_i_o'] / self.d_m['va_a_u'] | |
self.d_m['va_i_op_1'] = self.d_m['va_i_o_1'] / self.d_m['va_a_u'] | |
except ZeroDivisionError: | |
self.d_m['va_i_op'] = self.d_m['va_i_op_1'] = 0 | |
def get_stat(self, df, mode, name, flag=True): | |
if 'dam' in mode: | |
self.d_m[f'{name}_d'] = df.DAMAGE_ID.nunique() | |
if 'aff' in mode: | |
self.d_m[f'{name}_a'] = df.shape[0] | |
if 'aff_u' in mode: | |
self.d_m[f'{name}_a_u'] = df.USER_ID.nunique() | |
if flag and 'inc' in mode: | |
df_inc = self.i_df.merge(df, how='left', on=['DAMAGE_ID', 'USER_ID']).dropna(subset=['REG_DATE']) | |
else: df_inc = df | |
self.d_m[f'{name}_i'] = df_inc.shape[0] if 'inc' in mode else None | |
self.d_m[f'{name}_i_u'] = df_inc.USER_ID.nunique() if 'inc_u' in mode else None | |
self.d_m[f'{name}_i_o'] = df_inc[df_inc.REG_DEPARTMENT_ID.isin([105, 134])].shape[0] if 'inc_o' in mode else None | |
try: | |
self.d_m[f'{name}_i_op'] = self.d_m[f'{name}_i_o'] / self.d_m[f'{name}_a_u'] if 'inc_op' in mode else None | |
except ZeroDivisionError: | |
self.d_m[f'{name}_i_op'] = 0 | |
if 'inc_o_1' in mode: | |
if flag: | |
df_inc_1 = self.i_df_1.merge(df, how='left', on=['DAMAGE_ID', 'USER_ID']).dropna(subset=['REG_DATE']) | |
elif not flag: | |
df_inc_1 = self.i_df_1[self.i_df_1.INC_ID.isin(df_inc.INC_ID.values)] | |
self.d_m[f'{name}_i_o_1'] = df_inc_1[df_inc_1.REG_DEPARTMENT_ID.isin([105, 134])].shape[0] | |
try: | |
self.d_m[f'{name}_i_op_1'] = self.d_m[f'{name}_i_o_1'] / self.d_m[f'{name}_a_u'] if 'inc_op_1' in mode else None | |
except ZeroDivisionError: | |
self.d_m[f'{name}_i_op_1'] = 0 | |
def block_a(self): | |
self.get_stat(self.d_df, ['dam', 'aff', 'aff_u'], 'all') | |
self.get_stat(self.i_df, ['inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'all', flag=False) | |
def block_b(self): | |
self.get_stat(self.i_df, ['dam'], 'inc') | |
only_inc_dam = self.d_df[self.d_df.DAMAGE_ID.isin(self.i_df.DAMAGE_ID.values)] | |
self.get_stat(only_inc_dam, ['aff', 'aff_u'], 'inc') | |
self.d_m['inc_i'] = self.d_m['all_i'] | |
self.d_m['inc_i_u'] = self.d_m['all_i_u'] | |
self.d_m['inc_i_o'] = self.d_m['all_i_o'] | |
self.d_m['inc_i_op'] = self.d_m['inc_i_o'] / self.d_m['inc_a_u'] | |
self.d_m['inc_i_o_1'] = self.d_m['all_i_o_1'] | |
self.d_m['inc_i_op_1'] = self.d_m['inc_i_o_1'] / self.d_m['inc_a_u'] | |
def block_c(self): | |
dam_wo_inc = self.d_df[~self.d_df.DAMAGE_ID.isin(self.i_df.DAMAGE_ID.values)] | |
self.get_stat(dam_wo_inc, ['dam', 'aff', 'aff_u'], 'wo') | |
def block_d(self): | |
dam_not_log_df = self.i_df[~self.i_df.DAMAGE_ID.isin(self.l_df.DAMAGE_ID.values)] | |
self.get_stat(dam_not_log_df, ['dam'], 'nl') | |
not_log_df = self.d_df[self.d_df.DAMAGE_ID.isin(dam_not_log_df.DAMAGE_ID.values)] | |
self.get_stat(not_log_df, ['aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'nl') | |
def block_e(self): | |
new_df_inc = self.l_df[self.l_df.DAMAGE_ID.isin(self.i_df.DAMAGE_ID.values)] | |
self.get_stat(new_df_inc, ['dam'], 'li') | |
log_inc_dam_df = self.d_df[self.d_df.DAMAGE_ID.isin(new_df_inc.DAMAGE_ID.values)] | |
self.get_stat(log_inc_dam_df, ['aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'li') | |
def block_f(self): | |
not_act_inc_df = self.i_df.merge(self.d_df, how='left', on=['DAMAGE_ID', 'USER_ID']) | |
not_act_inc_df = not_act_inc_df[not_act_inc_df.REG_DATE.isna()] | |
self.get_stat(not_act_inc_df, ['dam', 'inc', 'inc_u', 'inc_o', 'inc_o_1'], 'na', flag=False) | |
def block_g(self): | |
self.get_stat(self.h_v['dam_filter_not_pass_df'], ['dam'], 'fnp') | |
filter_not_pass_df = self.d_df[self.d_df.DAMAGE_ID.isin(self.h_v['dam_filter_not_pass_df'].DAMAGE_ID.values)] | |
self.get_stat(filter_not_pass_df, ['aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'fnp') | |
def block_h(self): | |
self.get_stat(self.h_v['notified_dam_filter_pass_df'], ['dam'], 'fp') | |
filter_pass_df = self.d_df[self.d_df.DAMAGE_ID.isin(self.h_v['notified_dam_filter_pass_df'].DAMAGE_ID.values)] | |
self.get_stat(filter_pass_df, ['aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'fp') | |
def block_i(self): | |
billing_dam = self.h_v['not_notified_dam_df'][self.h_v['not_notified_dam_df'].DAMAGE_ID.\ | |
isin(self.h_v['filter_not_pass_df'].DAMAGE_ID.values)] | |
billing_dam = self.h_v['filter_not_pass_df'][self.h_v['filter_not_pass_df'].DAMAGE_ID.isin(billing_dam.DAMAGE_ID.values)] | |
return billing_dam | |
def block_j(self): | |
night_dam = self.h_v['filter_not_pass_df'].copy() | |
night_dam['weekday'] = night_dam.REG_DATE.dt.weekday | |
night_dam = night_dam[(night_dam.REG_DATE.dt.hour >= 22) | ((night_dam.REG_DATE.dt.hour < 8) & (night_dam.weekday.isin([0, 1, 2, 3, 4]))) |\ | |
((night_dam.REG_DATE.dt.hour < 10) & (night_dam.weekday.isin([5, 6])))] | |
night_dam.drop(columns=['weekday'], inplace=True) | |
self.get_stat(night_dam, ['dam', 'aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'nt') | |
return night_dam | |
def block_l(self, night_dam): | |
duration_dam = self.h_v['filter_not_pass_df'][(self.h_v['filter_not_pass_df']['Diff'] <= 10) & | |
(~self.h_v['filter_not_pass_df'].DAMAGE_ID.isin(night_dam.DAMAGE_ID.values))] | |
self.get_stat(duration_dam, ['dam', 'aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'dur') | |
return duration_dam | |
def block_k(self, night_dam, duration_dam): | |
count_dam = self.h_v['filter_not_pass_df'][(~self.h_v['filter_not_pass_df'].DAMAGE_ID.isin(night_dam.DAMAGE_ID.values)) & \ | |
(~self.h_v['filter_not_pass_df'].DAMAGE_ID.isin(duration_dam.DAMAGE_ID.values)) & \ | |
((self.h_v['filter_not_pass_df'].DAM_USERS_COUNT <= 10) | \ | |
(self.h_v['filter_not_pass_df'].DAM_USERS_COUNT >= 500))] | |
self.get_stat(count_dam, ['dam', 'aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'ct') | |
return count_dam | |
def blocks_ijkl(self): | |
billing_dam = self.block_i() | |
night_dam = self.block_j() | |
duration_dam = self.block_l(night_dam) | |
count_dam = self.block_k(night_dam, duration_dam) | |
def diff_dam(df): | |
return df[~(df.DAMAGE_ID.isin(duration_dam.DAMAGE_ID.values)) & | |
~(df.DAMAGE_ID.isin(night_dam.DAMAGE_ID.values)) & | |
~(df.DAMAGE_ID.isin(count_dam.DAMAGE_ID.values))] | |
billing_dam = diff_dam(billing_dam) | |
x_dam = self.h_v['filter_not_pass_df'][~self.h_v['filter_not_pass_df'].DAMAGE_ID.isin(billing_dam.DAMAGE_ID.values)] | |
x_dam = diff_dam(x_dam) | |
self.get_stat(billing_dam, ['dam', 'aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'b') | |
self.get_stat(x_dam, ['dam', 'aff', 'aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'x') | |
def block_m(self): | |
self.get_stat(self.h_v['class1_with_B'], ['aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'c1') | |
def block_n(self): | |
self.get_stat(self.h_v['var_B_df'], ['aff_u', 'inc', 'inc_u', 'inc_o', 'inc_o_1'], 'vb') | |
self.d_m['vb_d'] = self.h_v['notified_dam_filter_pass_df'][self.h_v['notified_dam_filter_pass_df'].DAMAGE_ID.isin(self.df_B.DAMAGE_ID.values)].DAMAGE_ID.nunique() | |
def block_o(self): | |
class0_df = self.h_v['class0_with_B'][self.h_v['class0_with_B'].INS_DATE.isna()] | |
self.get_stat(class0_df, ['aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'c0') | |
def block_p(self): | |
self.d_m['push_a_u'] = self.h_v['push_df'].USER_ID.nunique() | |
def block_q(self): | |
self.d_m['sms_a_u'] = self.h_v['sms_df'].USER_ID.nunique() | |
def block_r(self): | |
notify_class1_inc = self.h_v['notify_class1'].merge(self.i_df, how='left', on=['DAMAGE_ID', 'USER_ID']) | |
self.h_v['not_send_df'] = notify_class1_inc[notify_class1_inc.STATE == 'not send'] | |
self.d_m['ns_a_u'] = self.h_v['not_send_df'].shape[0] | |
def block_s(self): | |
notify_class1_inc = self.h_v['notify_class1'].merge(self.i_df, how='left', on=['DAMAGE_ID', 'USER_ID']) | |
self.h_v['handling_df'] = notify_class1_inc[notify_class1_inc.STATE == 'handling'] | |
self.d_m['h_a_u'] = self.h_v['handling_df'].shape[0] | |
def block_t(self): | |
notify_not_prop = self.n_df[self.n_df.USER_ID.isin(self.h_v['class_1'].USER_ID.values)] | |
notify_not_prop = notify_not_prop[~notify_not_prop.USER_ID.isin(self.h_v['notify_class1'].USER_ID.values)] | |
if notify_not_prop.empty: | |
self.h_v['notify_not_prop'] = self.h_v['notify_after_night'] = notify_not_prop.copy() | |
self.d_m['nant_a_u'] = self.d_m['nnp_a_u'] = 0 | |
else: | |
self.h_v['notify_after_night'] = notify_not_prop[notify_not_prop.LAST_SEND_DATE.dt.hour.isin([8])] # Утро только в 8 | |
self.d_m['nant_a_u'] = self.h_v['notify_after_night'].shape[0] | |
self.h_v['notify_not_prop'] = notify_not_prop[~notify_not_prop.LAST_SEND_DATE.dt.hour.isin([8])] | |
self.d_m['nnp_a_u'] = self.h_v['notify_not_prop'].shape[0] | |
def block_u(self): | |
not_notify_usrs = set(self.h_v['class_1'].USER_ID.values) - set(self.h_v['notify_class1'].USER_ID.values) \ | |
- set(self.h_v['notify_after_night'].USER_ID.values) \ | |
- set(self.h_v['notify_not_prop'].USER_ID.values) | |
self.h_v['not_notify_df'] = self.h_v['class_1'][self.h_v['class_1'].USER_ID.isin(not_notify_usrs)] | |
self.get_stat(self.h_v['not_notify_df'], ['aff_u', 'inc', 'inc_u', 'inc_o', 'inc_op', 'inc_o_1', 'inc_op_1'], 'nn') | |
def block_v(self): | |
push_before_df = self.h_v['push_inc'][self.h_v['push_inc'].INC_REG_DATE <= self.h_v['push_inc'].LAST_SEND_DATE] | |
self.get_stat(push_before_df, ['inc', 'inc_u', 'inc_o', 'inc_o_1'], 'pb', flag=False) | |
try: | |
self.d_m['pb_i_op'] = self.d_m['pb_i_o'] / self.d_m['push_a_u'] | |
self.d_m['pb_i_op_1'] = self.d_m['pb_i_o_1'] / self.d_m['push_a_u'] | |
except ZeroDivisionError: | |
self.d_m['pb_i_op'] = self.d_m['pb_i_op_1'] = 0 | |
def block_w(self): | |
push_after_df = self.h_v['push_inc'][self.h_v['push_inc'].INC_REG_DATE > self.h_v['push_inc'].LAST_SEND_DATE] | |
push_after_df = self.h_v['var_A_df'][self.h_v['var_A_df'].USER_ID.isin(push_after_df.USER_ID.values)] | |
self.get_stat(push_after_df, ['inc', 'inc_u', 'inc_o', 'inc_o_1'], 'pa') | |
try: | |
self.d_m['pa_i_op'] = self.d_m['pa_i_o'] / self.d_m['push_a_u'] | |
self.d_m['pa_i_op_1'] = self.d_m['pa_i_o_1'] / self.d_m['push_a_u'] | |
except ZeroDivisionError: | |
self.d_m['pa_i_op'] = self.d_m['pa_i_op_1'] = 0 | |
self.adding_var_b() | |
def block_x(self): | |
sms_before_df = self.h_v['sms_inc'][self.h_v['sms_inc'].INC_REG_DATE <= self.h_v['sms_inc'].LAST_SEND_DATE] | |
self.get_stat(sms_before_df, ['inc', 'inc_u', 'inc_o', 'inc_o_1'], 'sb', flag=False) | |
try: | |
self.d_m['sb_i_op'] = self.d_m['sb_i_o'] / self.d_m['sms_a_u'] | |
self.d_m['sb_i_op_1'] = self.d_m['sb_i_o_1'] / self.d_m['sms_a_u'] | |
except ZeroDivisionError: | |
self.d_m['sb_i_op'] = self.d_m['sb_i_op_1'] = 0 | |
def block_y(self): | |
sms_after_df = self.h_v['sms_inc'][self.h_v['sms_inc'].INC_REG_DATE > self.h_v['sms_inc'].LAST_SEND_DATE] | |
sms_after_df = self.h_v['var_A_df'][self.h_v['var_A_df'].USER_ID.isin(sms_after_df.USER_ID.values)] | |
self.get_stat(sms_after_df, ['inc', 'inc_u', 'inc_o', 'inc_o_1'], 'sa') | |
try: | |
self.d_m['sa_i_op'] = self.d_m['sa_i_o'] / self.d_m['sms_a_u'] | |
self.d_m['sa_i_op_1'] = self.d_m['sa_i_o_1'] / self.d_m['sms_a_u'] | |
except ZeroDivisionError: | |
self.d_m['sa_i_op'] = self.d_m['sa_i_op_1'] = 0 | |
def new_block_b(self): | |
self.h_v['var_A_df'] = self.h_v['class_1'][~self.h_v['class_1'].USER_ID.isin(np.union1d(self.h_v['mistakes_df'].USER_ID.values, self.h_v['not_notify_df'].USER_ID.values))] | |
self.get_stat(self.h_v['var_A_df'], ['aff_u', 'inc', 'inc_u', 'inc_o', 'inc_o_1'], 'va') | |
def new_block_a(self): | |
self.h_v['mistakes_df'] = pd.concat([self.h_v['notify_after_night'], self.h_v['notify_not_prop'], | |
self.h_v['handling_df'], self.h_v['not_send_df']],sort=True) | |
self.d_m['m_a_u'] = self.h_v['mistakes_df'].shape[0] | |
class1_inc_df = self.i_df.merge(self.h_v['class1_with_B'], how='right', on=['USER_ID', 'DAMAGE_ID']).dropna(subset=['INC_ID']) | |
mistakes_inc_df = class1_inc_df[class1_inc_df.USER_ID.isin(self.h_v['mistakes_df'].USER_ID.values)] | |
self.d_m['m_i'] = mistakes_inc_df.shape[0] | |
self.d_m['m_i_u'] = mistakes_inc_df.USER_ID.nunique() | |
self.d_m['m_i_o'] = mistakes_inc_df[mistakes_inc_df.REG_DEPARTMENT_ID.isin([105, 134])].shape[0] | |
self.d_m['m_i_o_1'] = mistakes_inc_df[(mistakes_inc_df.REG_DEPARTMENT_ID.isin([105, 134])) & | |
(mistakes_inc_df.INC_ID.isin(self.i_df_1.INC_ID.values))].shape[0] | |
try: | |
self.d_m['m_i_op'] = self.d_m['m_i_o'] / self.d_m['m_a_u'] | |
self.d_m['m_i_op_1'] = self.d_m['m_i_o_1'] / self.d_m['m_a_u'] | |
except ZeroDivisionError: | |
self.d_m['m_i_op'] = self.d_m['m_i_op_1'] = 0 | |
class GraphAndReportMaker(MetricsCollection): | |
affected = 'Affected: {}, users: {}' | |
incidents = 'Incidents: {}, users: {}' | |
operator = 'Operator: {} / {}' | |
operator2 = 'Operator: {} ({:.2%}) / {} ({:.2%})' | |
def run_all(self): | |
self.get_inc_for_hour() | |
self.get_helper_variables() | |
self.block_a() | |
self.block_b() | |
self.block_c() | |
self.block_d() | |
self.block_e() | |
self.block_f() | |
self.block_g() | |
self.block_h() | |
self.blocks_ijkl() | |
self.block_m() | |
self.block_n() | |
self.block_o() | |
self.block_p() | |
self.block_q() | |
self.block_r() | |
self.block_s() | |
self.block_t() | |
self.block_u() | |
self.block_v() | |
self.block_x() | |
self.new_block_a() | |
self.new_block_b() | |
self.block_y() | |
self.block_w() | |
def make_block(self, title, mode, name): | |
if 'x_0' in mode and 'affected' in title: | |
subblock1 = title.format(self.d_m[f'{name}_a_u']) | |
elif 'x_0' in mode and 'affected' not in title: | |
subblock1 = title.format(self.d_m[f'{name}_d']) | |
else: subblock1 = title | |
subblock2 = self.affected.format(self.d_m[f'{name}_a'], self.d_m[f'{name}_a_u']) if 'x_1' and 'x_2' in mode else None | |
subblock3 = self.incidents.format(self.d_m[f'{name}_i'], self.d_m[f'{name}_i_u']) if 'x_3' and 'x_4' in mode else None | |
if ('x_7' and 'x_8') not in mode and ('x_5' and 'x_6') in mode: | |
subblock4 = self.operator.format(self.d_m[f'{name}_i_o'], self.d_m[f'{name}_i_o_1']) | |
elif 'x_5' and 'x_6' and 'x_7' and 'x_8' in mode: | |
subblock4 = self.operator2.format(self.d_m[f'{name}_i_o'], self.d_m[f'{name}_i_op'], self.d_m[f'{name}_i_o_1'], self.d_m[f'{name}_i_op_1']) | |
else: | |
subblock4 = None | |
return subblock1, subblock2, subblock3, subblock4 | |
def x4_block(self, title, name): | |
return self.make_block(title, ['x_0', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8'], name) | |
def x3_block_aff(self, title, name): | |
return self.make_block(title, ['x_0', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8'], name) | |
def x3_block_inc(self, title, name): | |
return self.make_block(title, ['x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8'], name) | |
def get_graph(self): | |
A1, A2, A3, A4 = self.x4_block('All damages: {}', 'all') | |
B1, B2, B3, B4 = self.x4_block('Dam with inc: {}', 'inc') | |
C1, C2, _, _ = self.make_block('Dam w/o inc: {}',['x_0', 'x_1', 'x_2'], 'wo') | |
D1, D2, D3, D4 = self.x4_block('Not in logs: {}', 'nl') | |
E1, E2, E3, E4 = self.x4_block('Predicted: {}', 'li') | |
F1, _, F2, F3 = self.make_block('Incidents not act: {}', ['x_0', 'x_3', 'x_4', 'x_5', 'x_6'], 'na') | |
G1, G2, G3, G4 = self.x4_block('Filters not passed: {}', 'fnp') | |
H1, H2, H3, H4 = self.x4_block('Notifications sent: {}', 'fp') | |
I1, I2, I3, I4 = self.x4_block('Night dam: {}', 'nt') | |
J1, J2, J3, J4 = self.x4_block('Count dam: {}', 'ct') | |
K1, K2, K3, K4 = self.x4_block('Duration dam: {}', 'dur') | |
Y1, Y2, Y3, Y4 = self.x4_block('Billing dam: {}', 'b') | |
L1, L2, L3, L4 = self.x4_block('X dam: {}', 'x') | |
M1, _, M2, M3 = self.x3_block_aff('Class 1, affected: {}', 'c1') | |
# Z1, _, Z2, Z3 = self.x3_block_aff('Var B, affected: {}', 'vb') ## Var B | |
N1, _, N2, N3 = self.x3_block_aff('Class 0, affected: {}', 'c0') | |
O1 = 'Push sent: {}'.format(self.d_m['push_a_u']) | |
P1 = 'SMS sent: {}'.format(self.d_m['sms_a_u']) | |
Q1 = 'Not sent: {}'.format(self.d_m['ns_a_u']) | |
R1 = 'Handling: {}'.format(self.d_m['h_a_u']) | |
S1, _, S2, S3 = self.x3_block_aff('Not notify, affected: {}', 'nn') | |
T1 = 'Notify after niqht: {}'.format(self.d_m['nant_a_u']) | |
U1, _, U2, U3 = self.x3_block_inc('Incidents before push', 'pb') | |
V1, _, V2, V3 = self.x3_block_inc('Incidents after push', 'pa') | |
W1, _, W2, W3 = self.x3_block_inc('Incidents before sms', 'sb') | |
X1, _, X2, X3 = self.x3_block_inc('Incidents after sms', 'sa') | |
new_A1, _, new_A2, new_A3 = self.x3_block_aff('Var A, affected: {}', 'va') | |
new_B1, _, new_B2, new_B3 = self.x3_block_aff('Mistakes, affected: {}', 'm') | |
new_C = 'Change usluga: {}'.format(self.d_m['nnp_a_u']) | |
dot = Digraph('structs', node_attr={'shape': 'record'}) | |
dot.node('A', f'{{ <f0> {A1}|<f1> {A2}|<f2> {A3}|<f3> {A4}}}') | |
dot.node('B', f'{{ <f0> {B1}|<f1> {B2}|<f2> {B3}|<f3> {B4}}}') | |
dot.node('C', f'{{ <f0> {C1}|<f1> {C2}}}') | |
dot.node('D', f'{{ <f0> {D1}|<f1> {D2}|<f2> {D3}|<f3> {D4}}}') | |
dot.node('E', f'{{ <f0> {E1}|<f1> {E2}|<f2> {E3}|<f3> {E4}}}') | |
dot.node('F', f'{{ <f0> {F1}|<f1> {F2}|<f2> {F3}}}') | |
dot.node('G', f'{{ <f0> {G1}|<f1> {G2}|<f2> {G3}|<f3> {G4}}}') | |
dot.node('H', f'{{ <f0> {H1}|<f1> {H2}|<f2> {H3}|<f3> {H4}}}') | |
dot.node('I', f'{{ <f0> {I1}|<f1> {I2}|<f2> {I3}|<f3> {I4}}}') | |
dot.node('K', f'{{ <f0> {K1}|<f1> {K2}|<f2> {K3}|<f3> {K4}}}') | |
dot.node('J', f'{{ <f0> {J1}|<f1> {J2}|<f2> {J3}|<f3> {J4}}}') | |
dot.node('Y', f'{{ <f0> {Y1}|<f1> {Y2}|<f2> {Y3}|<f3> {Y4}}}') | |
dot.node('L', f'{{ <f0> {L1}|<f1> {L2}|<f2> {L3}|<f3> {L4}}}') | |
dot.node('M', f'{{ <f0> {M1}|<f1> {M2}|<f2> {M3}}}') | |
# dot.node('Z', f'{{ <f0> {Z1}|<f1> {Z2}|<f2> {Z3}}}') ## Var B | |
dot.node('N', f'{{ <f0> {N1}|<f1> {N2}|<f2> {N3}}}') | |
dot.node('O', O1) | |
dot.node('P', P1) | |
dot.node('Q', Q1) | |
dot.node('R', R1) | |
dot.node('T', T1) | |
dot.node('U', f'{{ <f0> {U1}|<f1> {U2}|<f2> {U3}}}') | |
dot.node('V', f'{{ <f0> {V1}|<f1> {V2}|<f2> {V3}}}') | |
dot.node('W', f'{{ <f0> {W1}|<f1> {W2}|<f2> {W3}}}') | |
dot.node('X', f'{{ <f0> {X1}|<f1> {X2}|<f2> {X3}}}') | |
dot.node('new_A', f'{{ <f0> {new_A1}|<f1> {new_A2}|<f2> {new_A3}}}') | |
dot.node('new_B', f'{{ <f0> {new_B1}|<f1> {new_B2}|<f2> {new_B3}}}') | |
dot.node('new_C', new_C) | |
dot.node('S', f'{{ <f0> {S1}|<f1> {S2}|<f2> {S3}}}') | |
dot.edges(['AB', 'AC', 'BD','BE', 'BF', 'EG','EH', | |
'GI', 'GJ','GK','GY','GL', 'HM', 'HN', ## 'MZ', | |
('M', 'new_A'), ('M', 'new_B'), 'MS',('new_A', 'O'), | |
('new_A', 'P'), ('new_B', 'Q'), ('new_B', 'R'), | |
('new_B', 'new_C'), ('new_B', 'T'), | |
'OU', 'OV', 'PW', 'PX']) | |
dot.render('report','./artefacts/', view=False) | |
def insert_report(self, weekly=True): | |
with open('./configs/params.json', 'r') as f: | |
params = json.load(f) | |
predict_logs = Logs(user=params['predict_logs']['user'], | |
password=params['predict_logs']['password'], | |
host=params['predict_logs']['host'], | |
db_name=params['predict_logs']['db_name']) | |
df = pd.DataFrame(list(self.d_m.items()), columns=['METRIC', 'Значение']) | |
df['Значение'] = df['Значение'].round(decimals=4) | |
df['Группа'] = df['METRIC'].apply(lambda x: re.split(r'_', x, maxsplit=1)[0]) | |
df['Метрика'] = df['METRIC'].apply(lambda x: re.split(r'_', x, maxsplit=1)[1]) | |
df.drop(columns=['METRIC'], inplace=True) | |
df.dropna(inplace=True) | |
df.loc[df['Метрика'].isin(['i_op', 'i_op_1']), 'Значение'] = df['Значение'] * 100 | |
df_old = df.copy() | |
df_old['INS_DATE'] = str(date.today()) | |
if weekly: | |
predict_logs.insert_report_logs(df_old) | |
else: | |
predict_logs.insert_report_logs_daily(df_old) | |
return df | |
def get_report(self): | |
df = self.insert_report() | |
dict_ = {'all': 'Все аварии', 'inc': 'Аварии с инцидентами', 'wo': 'Аварии без инцидентов', | |
'nl': 'Нет в логах', 'li': 'Были предсказания', 'na': 'Нет в актах', 'fnp': 'Не прошли фильтры', | |
'fp': 'Отправлены оповещения', 'nt': 'Ночные аварии', 'ct': 'Аварии с неверным кол-вом пострадавших', | |
'dur': 'Аварии с неверной длительностью', 'b': 'Аварии не прошедшие логику биллинга/Диспетчера', | |
'x': 'Неизвестная категория аварий', 'c1': 'Класс 1', ## 'vb': 'Категория B', | |
'c0': 'Класс 0', 'push': 'Пуши отправлены', 'sms': 'Смс отправлены', 'ns': 'Не отправлено оповещение', | |
'h': 'Зависло', 'nant': 'Ошибочные отправки после ночи','nnp': 'Изменилась услуга', 'nn': 'Не оповещены', | |
'pb': 'Инциденты до пушей', 'sb': 'Инциденты до смс', 'm': 'Ошибки', 'va': 'Предсказаны, категория A', | |
'sa': 'Инциденты после смс', 'pa': 'Инциденты после пушей'} | |
dict_2 = {'d': 'Количество аварий', 'a': 'Все пострадавшие', 'a_u': 'Уникальные пострадавшие', | |
'i': 'Все инциденты', 'i_u': 'Инциденты от уникальных пострадавших', 'i_o': 'Все обращения до оператора', | |
'i_op': 'Процент обращений до оператора от уникального кол-ва пострадавших', | |
'i_op_1': 'Процент обращений до оператора в первые 2 часа от уникального кол-ва пострадавших', | |
'i_o_1': 'Обращения до оператора в первые 2 часа'} | |
df['Группа'] = df['Группа'].map(dict_) | |
df = df[df['Группа'].notna()] | |
df['Метрика'] = df['Метрика'].map(dict_2) | |
df = df.set_index(['Группа', 'Метрика']) | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment