Skip to content

Instantly share code, notes, and snippets.

@elliottcordo
Last active February 9, 2023 21:06
Show Gist options
  • Save elliottcordo/f1aa33d4930c47797ba7fbaa2ffb2e78 to your computer and use it in GitHub Desktop.
Save elliottcordo/f1aa33d4930c47797ba7fbaa2ffb2e78 to your computer and use it in GitHub Desktop.
use database "CRM_POC_DB";
use schema "CUST_CLEANUP";
drop table match_results;
create temporary table match_results as
with cust as (
select
ACCT_CP,
COMPANY_CUST,
COMPANY_CODE,
NUM_CP,
FN_CP,
LN_CP,
EMAIL_CP,
REGEXP_REPLACE(CELL_CP, '[^0-9]','') CELL_CP,
REGEXP_REPLACE(TEL1_CP, '[^0-9]','') TEL1_CP,
REGEXP_REPLACE(TEL2_CP, '[^0-9]','') TEL2_CP,
REGEXP_REPLACE(TEL3_CP, '[^0-9]','') TEL3_CP,
REGEXP_REPLACE(TEL4_CP, '[^0-9]','') TEL4_CP,
coalesce(REGEXP_REPLACE(CELL_CP, '[^0-9]',''), '') || '|'
|| coalesce(REGEXP_REPLACE(TEL1_CP, '[^0-9]',''), '') || '|'
|| coalesce(REGEXP_REPLACE(TEL2_CP, '[^0-9]',''), '') || '|'
|| coalesce(REGEXP_REPLACE(TEL3_CP, '[^0-9]',''), '') || '|'
|| coalesce(REGEXP_REPLACE(TEL4_CP, '[^0-9]',''), '') as phone_list,
ADD_CP,
ADD1_CP,
CT_CP,
substring(ZIP_CP, 0, charindex('-', ZIP_CP) - 1) as ZIP_CP,
CN_CP,
OPT_CP,
BIRTH_CP,
ANV_CP
from "CRM_POC_DB"."CUST_CLEANUP"."KWI_CUST" A
where amd_cp in ('A', 'C')
// and NOT EXISTS (
// SELECT 'X'
// FROM "CRM_POC_DB"."CUST_CLEANUP"."VW_ACCOUNT_CLEANUP" C
// WHERE A.ACCT_CP = C.ID)
),
matches as (
select
a.ACCT_CP,
b.ACCT_CP as acct_cp_mc,
a.COMPANY_CUST,
b.COMPANY_CUST as company_cust_mc,
// a.NUM_CP,
a.FN_CP,
a.LN_CP,
b.FN_CP as fn_cp_mc,
b.LN_CP as ln_cp_mc,
a.EMAIL_CP,
b.EMAIL_CP as email_cp_mc,
a.CELL_CP,
a.TEL1_CP,
a.TEL2_CP,
a.TEL3_CP,
a.TEL4_CP,
b.phone_list as phone_list_mc,
a.phone_list,
a.ADD_CP,
a.ADD1_CP,
b.add1_cp as add1_cp_mc,
a.ZIP_CP,
b.zip_cp as zip_cd_mc
from cust a
join cust b on a.fn_cp = b.fn_cp
and a.ln_cp = b .ln_cp
and a.company_cust <> b.company_cust
and (
a.email_cp = b.email_cp
or a.TEL1_CP = b.TEL1_CP
or charindex(a.CELL_CP, b.phone_list) > 0
or charindex(a.TEL1_CP, b.phone_list) > 0
or charindex(a.TEL2_CP, b.phone_list) > 0
or charindex(a.TEL3_CP, b.phone_list) > 0
or charindex(a.TEL4_CP, b.phone_list) > 0
or (left(a.add_cp,5) = left(b.add_cp,5) and a.zip_cp = b.zip_cp))
)
select a.*
FROM matches a
INNER JOIN matches b
ON b.company_cust = a.company_cust_mc
AND b.company_cust_mc = a.company_cust;
--WHERE a.company_cust < a.company_cust_mc;
-- exportable results for python notebook
select COMPANY_CUST, COMPANY_CUST_MC
from match_results;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment