Skip to content

Instantly share code, notes, and snippets.

View lfy79001's full-sized avatar

Fangyu Lei lfy79001

View GitHub Profile
with prep as (
select
user_pseudo_id,
(select value.int_value from unnest(event_params) where key = 'ga_session_id') as session_id,
array_agg((select value.string_value from unnest(event_params) where key = 'source') ignore nulls
order by event_timestamp)[safe_offset(0)] as source,
array_agg((select value.string_value from unnest(event_params) where key = 'medium') ignore nulls
order by event_timestamp)[safe_offset(0)] as medium,
array_agg((select value.string_value from unnest(event_params) where key = 'campaign') ignore nulls
order by event_timestamp)[safe_offset(0)] as campaign
WITH BUILDING AS(
SELECT A.NAMES['local']::VARCHAR AS NAME
,B.Class
,B.subclass
,count(9) num_buidling
,SUM(B.surface_area_sq_m) sum_surface_area_sq_m
FROM "NETHERLANDS"."V_ADMINISTRATIVE" A
INNER
JOIN(SELECT Class
,subclass
SELECT
SEASON_ID-20000 AS season,
TEAM_ID_HOME AS team_id,
TEAM_NAME_HOME AS team_name,
FG3_PCT_HOME AS three_point_percentage,
COUNT(TEAM_ID_HOME) AS team_game_count,
"home" AS game_location
FROM Game
GROUP BY SEASON_ID, TEAM_ID_HOME
SELECT
namePlayer AS player_name,
nameTeam AS team_name,
2021 - strftime('%Y', Player_Attributes.BIRTHDATE) AS age,
Player_Attributes.DRAFT_YEAR AS draft_year,
2021 - Player_Attributes.DRAFT_YEAR AS years_in_NBA,
Player_Attributes.POSITION AS game_position,
ROUND(value/1000000) AS salary_in_millions
FROM Player_Salary
JOIN Player ON
WITH CustomerData AS (
SELECT
customer_unique_id,
customer_zip_code_prefix AS zip_code_prefix,
COUNT(DISTINCT orders.order_id) AS order_count,
SUM(payment_value) AS total_payment,
JULIANDAY(MIN(order_purchase_timestamp)) AS first_order_day,
JULIANDAY(MAX(order_purchase_timestamp)) AS last_order_day
FROM customers
JOIN orders USING (customer_id)
WITH RecencyScore AS (
SELECT customer_unique_id,
MAX(order_purchase_timestamp) AS last_purchase,
NTILE(5) OVER (ORDER BY MAX(order_purchase_timestamp) DESC) AS recency
FROM orders
JOIN customers USING (customer_id)
WHERE order_status = 'delivered'
GROUP BY customer_unique_id
),
FrequencyScore AS (
WITH DailySalesPerCategory AS (
SELECT
DATE(order_purchase_timestamp) AS date,
-- Days since 2017-01-01
CAST(JULIANDAY(order_purchase_timestamp) - JULIANDAY('2017-01-01') AS INTEGER) AS day,
product_category_name_english AS category,
SUM(price) AS sales
FROM
orders
JOIN order_items USING (order_id)
SELECT
DATE(order_purchase_timestamp) AS date,
-- Days since 2017-01-01
CAST(JULIANDAY(order_purchase_timestamp) - JULIANDAY('2017-01-01') AS INTEGER) AS day,
product_category_name_english AS category,
SUM(price) AS sales
FROM
orders
JOIN order_items USING (order_id)
JOIN products USING (product_id)
SELECT
bike_number,
AVG(dist_in_m) AS avg_dist_m,
SUM(dist_in_m) AS total_dist_m
FROM (
SELECT
ST_DISTANCE(
ST_GEOGPOINT(start_lon, start_lat),
ST_GEOGPOINT(end_lon, end_lat)
) AS dist_in_m,
-- pulling user page views from GA4 events
WITH base_table AS (
-- pulls relevant columns from relevant dates to decrease the size of data scanned
SELECT
event_name,
event_date,
event_timestamp,
user_pseudo_id,
user_id,
device,