Skip to content

Instantly share code, notes, and snippets.

@DominikStyp
Last active August 21, 2021 23:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DominikStyp/031bf7a7b1e4fff58499f6ccb18b6fcd to your computer and use it in GitHub Desktop.
Save DominikStyp/031bf7a7b1e4fff58499f6ccb18b6fcd to your computer and use it in GitHub Desktop.
SQL: Window Functions advanced example of extracting average (Uber)
USE test;
DROP TABLE IF EXISTS uber_request_logs;
CREATE TABLE IF NOT EXISTS
uber_request_logs (
request_id INT(11),
request_date DATE,
request_status VARCHAR(255),
distance_to_travel FLOAT,
monetary_cost FLOAT,
driver_to_client_distance FLOAT
);
INSERT INTO uber_request_logs(
request_id,
request_date,
request_status,
distance_to_travel,
monetary_cost,
driver_to_client_distance
)
VALUES
(1 ,'2020-01-09', 'success', 70.59, 6.56, 14.36),
(2 ,'2020-01-24', 'success', 93.36, 22.68, 19.9),
(3 ,'2020-02-08', 'fail', 51.24, 11.39, 21.32),
(4 ,'2020-02-23', 'success', 61.58, 8.04, 44.26),
(5 ,'2020-03-09', 'success', 25.04, 7.19, 1.74),
(6 ,'2020-03-24', 'fail', 45.57, 4.68, 24.19),
(7 ,'2020-04-08', 'success', 24.45, 12.69, 15.91),
(8 ,'2020-04-23', 'success', 48.22, 11.2, 48.82),
(9 ,'2020-05-08', 'success', 56.63, 4.04, 16.08),
(10 ,'2020-05-23', 'fail', 19.03, 16.65, 11.22),
(11 ,'2020-06-07', 'fail', 81, 6.56, 26.6),
(12 ,'2020-06-22', 'fail', 21.32, 8.86, 28.57),
(13 ,'2020-07-07', 'fail', 14.74, 17.76, 19.33),
(14 ,'2020-07-22', 'success', 66.73, 13.68, 14.07),
(15 ,'2020-08-06', 'success', 32.98, 16.17, 25.34),
(16 ,'2020-08-21', 'success', 46.49, 1.84, 41.9),
(17 ,'2020-09-05', 'fail', 45.98, 12.2, 2.46),
(18 ,'2020-09-20', 'success', 3.14, 24.8, 36.6),
(19 ,'2020-10-05', 'success', 75.33, 23.04, 29.99),
(20 ,'2020-10-20', 'success', 53.76, 22.94, 18.74);
request_date dist_to_cost avg_dist_to_cost abs_diff
2020-01-09 10.76 7.44 3.32
2020-01-24 4.12 7.44 3.32
2020-02-08 4.5 6.08 1.58
2020-02-23 7.66 6.08 1.58
2020-03-09 3.48 6.61 3.13
2020-03-24 9.74 6.61 3.13
2020-04-08 1.93 3.12 1.19
2020-04-23 4.31 3.12 1.19
2020-05-08 14.02 7.58 6.44
2020-05-23 1.14 7.58 6.44
2020-06-07 12.35 7.38 4.97
2020-06-22 2.41 7.38 4.97
2020-07-07 0.83 2.855 2.02
2020-07-22 4.88 2.855 2.02
2020-08-06 2.04 13.655 11.61
2020-08-21 25.27 13.655 11.62
2020-09-05 3.77 1.95 1.82
2020-09-20 0.13 1.95 1.82
2020-10-05 3.27 2.805 0.47
2020-10-20 2.34 2.805 0.46
SELECT b.request_date,
b.dist_to_cost,
b.avg_dist_to_cost,
ROUND(ABS(b.dist_to_cost - b.avg_dist_to_cost), 2) as abs_diff
FROM (
SELECT a.*,
AVG(a.dist_to_cost) OVER (PARTITION BY a.y_m) AS avg_dist_to_cost
FROM (
SELECT *,
DATE_FORMAT(`request_date`,'%Y-%m') AS y_m,
ROUND((distance_to_travel/monetary_cost), 2) AS dist_to_cost
FROM uber_request_logs
) a
ORDER BY a.request_date
) b
GROUP BY b.request_date, b.dist_to_cost, b.avg_dist_to_cost
ORDER BY b.request_date
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment