Skip to content

Instantly share code, notes, and snippets.

@lfy79001
Created May 13, 2024 00:10
Show Gist options
  • Save lfy79001/0420c63bcc57f2c052e4f0b522a741e2 to your computer and use it in GitHub Desktop.
Save lfy79001/0420c63bcc57f2c052e4f0b522a741e2 to your computer and use it in GitHub Desktop.
WITH DailySalesPerCategory AS (
SELECT
DATE(order_purchase_timestamp) AS date,
-- Days since 2017-01-01
CAST(JULIANDAY(order_purchase_timestamp) - JULIANDAY('2017-01-01') AS INTEGER) AS day,
product_category_name_english AS category,
SUM(price) AS sales
FROM
orders
JOIN order_items USING (order_id)
JOIN products USING (product_id)
JOIN product_category_name_translation USING (product_category_name)
WHERE
order_purchase_timestamp BETWEEN '2017-01-01' AND '2018-08-29'
AND category IN ('health_beauty', 'auto', 'toys', 'electronics', 'fashion_shoes')
GROUP BY
day,
product_category_name_english
),
LmPerCategory AS (
WITH DailySalesPerCategory AS (
SELECT
DATE(order_purchase_timestamp) AS date,
-- Days since 2017-01-01
CAST(JULIANDAY(order_purchase_timestamp) - JULIANDAY('2017-01-01') AS INTEGER) AS day,
product_category_name_english AS category,
SUM(price) AS sales
FROM
orders
JOIN order_items USING (order_id)
JOIN products USING (product_id)
JOIN product_category_name_translation USING (product_category_name)
WHERE
order_purchase_timestamp BETWEEN '2017-01-01' AND '2018-08-29'
AND category IN ('health_beauty', 'auto', 'toys', 'electronics', 'fashion_shoes')
GROUP BY
day,
product_category_name_english
)
SELECT
category,
-- Slope
(COUNT(*) * SUM(day * sales) - SUM(day) * SUM(sales)) /
(COUNT(*) * SUM(day * day) - SUM(day) * SUM(day))
AS slope,
-- Intercept
(SUM(sales) -
((COUNT(*) * SUM(day * sales) - SUM(day) * SUM(sales)) /
(COUNT(*) * SUM(day * day) - SUM(day) * SUM(day))) *
SUM(day)) / COUNT(*)
AS intercept
FROM
DailySalesPerCategory
GROUP BY
category
),
ForecastedSales AS (
SELECT
DATE(date, '+1 year') AS date,
category,
-- Increase in predicted sales * sales 1 year ago
(intercept + slope * (day + CAST(JULIANDAY('2018-12-31') - JULIANDAY('2017-12-31') AS INTEGER)))
/ (intercept + slope * day) * sales
AS forecasted_sales
FROM DailySalesPerCategory
JOIN LmPerCategory USING (category)
-- Filter for days of December 2018
WHERE day + CAST(JULIANDAY('2018-12-31') - JULIANDAY('2017-12-31') AS INTEGER)
BETWEEN CAST(JULIANDAY('2018-12-01') - JULIANDAY('2017-01-01') AS INTEGER)
AND CAST(JULIANDAY('2018-12-31') - JULIANDAY('2017-01-01') AS INTEGER)
)
SELECT
CAST(strftime('%d', date) AS INTEGER) AS december_2018_day,
category,
-- 5-day moving average
AVG(forecasted_sales)
OVER (PARTITION BY category ORDER BY date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
AS moving_avg_sales
FROM ForecastedSales
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment