Skip to content

Instantly share code, notes, and snippets.

@mshakhomirov
Created May 6, 2024 19:27
Show Gist options
  • Save mshakhomirov/ed1a3f8c13de140b4ec3b3c5e8ff8904 to your computer and use it in GitHub Desktop.
Save mshakhomirov/ed1a3f8c13de140b4ec3b3c5e8ff8904 to your computer and use it in GitHub Desktop.
with opportunity as (
select distinct
'dbo_opportunity' datasource
,'Opportunity created' event_name
, toLowCardinality(id) id
, parseDateTimeBestEffortOrNull(createdon) timestamp_utc
, toDate(timestamp_utc) date_utc
,nullif(lower(contactid ),'none') missing_contactid --? contactid in this table is missing
,upper(nullif(lower(customerid ),'none')) customerid
,nullif(lower(parentaccountid ),'none') parentaccountid
,nullif(lower(customeridname ),'none') customeridname
,nullif(lower(parentaccountidname),'none') companyname
,nullif(lower(ncbh_contactemail ),'none') ncbh_contactemail
,upper(nullif(lower(originatingleadid ),'none')) originatingleadid
,if(toFloat64(actualvalue) = 0
, 999
,toFloat64(actualvalue)
) actualvalue
from
im_9598_90e.prod_dataverse_dbo_opportunity_9598_azure_sql
where
date_utc > toDate('2024-04-01')
order by
actualvalue desc
)
-- select count(*) from opportunity limit 10
-- select * from opportunity limit 10
, contact as (
select
'contact' data_source
,'Contact created' event_name
,coalesce(
nullif(emailaddress1,'')
,nullif(emailaddress2,'')
,nullif(emailaddress3,'')
) email
,lower(hex(SHA256(coalesce(
email
)))) user_id
,parseDateTimeBestEffortOrNull(createdon) timestamp_utc
, toDate(timestamp_utc) date_utc
,'web_form' traffic_source_name
,'gtm_form_submit' traffic_source_medium
,'thenationalcouncil.hrmdirect.com' traffic_source_source -- get source from ga4
,upper(nullif(lower(contactid ),'none')) contactid --E44685A7-7E62-E711-8103-5065F38B31A1 = id
,toLowCardinality(id) id -- same as contactid
,nullif(lower(company),'none') company -- ! Do not use
,upper(nullif(lower(accountid),'none')) accountid -- None
,upper(nullif(lower(accountidname),'none')) accountidname -- None
,altai_createnewaccountname -- None
,altai_councilidname -- None
,upper(nullif(lower(originatingleadid),'none')) originatingleadid -- we have approx. 571 out of 525000 matching leads. What is it?
,altai_webcompanyname -- None
from im_9598_90e.prod_dataverse_dbo_contact_9598_azure_sql
-- desc table im_9598_90e.prod_dataverse_dbo_contact_9598_azure_sql
where 1=1
and date_utc > toDate('2024-04-01')
and
(
length(emailaddress1) > 4 or
length(emailaddress2) > 4 or
length(emailaddress3) > 4
)
)
-- select * from contact limit 10
-- select count(*) from contact limit 10
, conversion as (
select distinct
con.datasource
,con.event_name
,con.timestamp_utc
,con.missing_contactid --? contactid in this table is missing
,con.originatingleadid
,con.customerid
,coalesce(c.contactid,c2.contactid) contactid -- ? We try to identify contact from opps using both customerid and originatingleadid from contact table
,con.parentaccountid
,con.customeridname
,con.companyname companyname --join c on contactid to add company name to ga4 case_id
,con.ncbh_contactemail
,con.actualvalue
from
opportunity con
left join contact c
on
con.customerid = c.contactid
left join contact c2
on
con.originatingleadid = c2.originatingleadid
)
-- select * from conversion limit 10
-- select count(*) from conversion limit 10
-- Identify CRM cases using company id, if not present then use user_id hash for attribution case
, crm_opportunity_contacts as (
select
c.data_source
,c.event_name
,co.companyname
,c.email
,c.user_id
,coalesce(co.companyname, c.user_id) case_id
,c.timestamp_utc
,c.traffic_source_name
,c.traffic_source_medium
,c.traffic_source_source
,con.actualvalue
from
contact c
-- inner join conversion co
left join conversion co
on
co.contactid = c.contactid
)
-- select * from crm_opportunity_contacts order by email limit 100
-- select count(*) from crm_opportunity_contacts limit 10
-- users who submitted meails via web form
, ga4_users as (
select
user_pseudo_id
FROM im_9598_90e.ga_4_events_a9598_9598_gbq_all_data
where 1=1
and user_id is not null
and user_id <> ''
and length(user_id) > 4
)
, ga4_user_events as (
SELECT
datasource
, event_name
, lower(user_id) user_id
, user_pseudo_id
,parseDateTimeBestEffortOrNull(timestamp_utc) timestamp_utc
, toDate(timestamp_utc) date_utc
, assumeNotNull(event_params_json_string) as event_params
, JSONExtract(event_params,
'Array(Tuple(
key String
,value Tuple(
string_value String
,int_value Int32
,float_value Float32
,double_value Float32
)
)
)'
) as event_params_extracted
, CAST(event_params_extracted, 'Map(String, Tuple(String,Int32,Float32,Float32))') as event_params_map
, event_params_map['page_refferer'].1 as page_refferer
, event_params_map['medium'].1 as medium
, event_params_map['campaign'].1 as campaign
, event_params_map['source'].1 as source
,traffic_source_name
,traffic_source_medium
,traffic_source_source
FROM im_9598_90e.ga_4_events_a9598_9598_gbq_all_data
where 1=1
and user_pseudo_id in (select user_pseudo_id from ga4_users)
)
-- select count(distinct user_id), count(*) from ga4_user_events limit 10
-- select * from ga4_user_events order by user_pseudo_id, timestamp_utc desc limit 10
-- select distinct
-- traffic_source_name
-- ,traffic_source_medium
-- ,traffic_source_source
-- from ga4_user_events order by user_pseudo_id, timestamp_utc desc limit 1000
-- activity events with channels
, events as (
select
datasource data_source
,g.event_name
,g.user_id -- email hash. join on contacts to get a company from crm
,g.timestamp_utc
,g.traffic_source_name
,g.traffic_source_medium
,g.traffic_source_source
,coalesce(c.case_id , g.user_id) case_id
,c.companyname
,c.email
from
ga4_user_events g
-- left join
inner join
crm_opportunity_contacts c on
g.user_id = c.user_id
)
-- select * from events limit 100
-- select count(*) from events limit 100
-- select * from contact where user_id in (select user_id from ga4_user_events)
---- ads
-- select count(*) from dev_attribution.attribution__ads limit 10
-- select distinct advertiser_name from dev_attribution.attribution__ads limit 100
desc table im_9598_90e.mrt_paid_ads_fields
select * FROM im_9598_90e.mrt_paid_ads_fields limit 10
with
ads_data as (
SELECT
date
, channel
, datasource
, ad_name
,campaign_name
, adset_name
, campaign_id
, spend
, creative_name
, advertiser_name
, advertiser_id
, adset_id
, campaign_type
, creative_id
FROM `improvado-9598-90e.improvado.paid_ads__basic_performance`
WHERE date >= "2024-04-01"
)
select distinct datasource, campaign_name, ad_name from ads_data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment