Skip to content

Instantly share code, notes, and snippets.

@wesseljt
Created May 7, 2024 22:32
Show Gist options
  • Save wesseljt/5c8572dab41168b2c0b82cedbd2bbf3a to your computer and use it in GitHub Desktop.
Save wesseljt/5c8572dab41168b2c0b82cedbd2bbf3a to your computer and use it in GitHub Desktop.
-- Create or replace the view with complex logic to select unique main user IDs
CREATE OR REPLACE VIEW final_user_id_mapping AS
WITH detailed_user_mappings AS (
-- Extract detailed information including all timestamps and sources
SELECT
m.user_id,
u.merged_id_array,
MIN(u.earliest_mapping_timestamp) AS first_mapping,
MAX(u.last_activity_timestamp) AS last_activity,
json_agg(u.audit_details) AS audit_details,
COUNT(*) OVER (PARTITION BY m.user_id) AS mapping_count
FROM user_stitching m
JOIN merged_user_ids u ON m.user_id = ANY(u.merged_id_array)
GROUP BY m.user_id, u.merged_id_array
),
validated_mappings AS (
-- Validate and filter mappings to ensure only the most relevant are included
SELECT
user_id,
merged_id_array,
first_mapping,
last_activity,
audit_details,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY last_activity DESC, first_mapping ASC) AS rn
FROM detailed_user_mappings
WHERE mapping_count > 1 -- Focus on users with multiple mappings for clarity
),
final_selection AS (
-- Final selection of unique user IDs
SELECT
user_id AS user_main_id,
unnest(merged_id_array) AS other_id,
'stitched_id' AS other_id_type,
first_mapping AS valid_at,
rn
FROM validated_mappings
WHERE rn = 1
)
SELECT
user_main_id,
other_id,
other_id_type,
valid_at,
ROW_NUMBER() OVER (PARTITION BY user_main_id ORDER BY valid_at ASC) AS row_number
FROM final_selection
WHERE row_number = 1;
-- This view is designed to give a comprehensive look at the user ID mapping process,
-- focusing on the most relevant and valid entries per user_main_id based on the earliest validation date.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment