nreijmer/trend_dashboard.sql

## trend_dashboard.sql
-- Create the long and short range (in days) for the trend calculation. See the explanation in the blog for the proper use of these values.

#Create dataset with all onsite searches based on unique visitors and keyword+date
#Only picking up words that do not contain other values then text or only numbers
# Excluding often misused listerpages when CPC is the channel

WITH preselect AS
(
SELECT DISTINCT(date) as date_derived
  , UPPER(productSKU) as SearchKeyword
  , COUNT (DISTINCT fullvisitorID) as nr_searches_word
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_2017*`
  , UNNEST(hits) AS T
  , UNNEST(product) AS P
WHERE productSKU IS NOT NULL
GROUP BY 1,2
ORDER BY 2,1 DESC
)

# Calculating the number of searches per day

, dates_volumes AS
(
SELECT DISTINCT(date) as date_derived
  , COUNT (DISTINCT fullvisitorid) as nr_searches_dag
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_2017*`
  , UNNEST(hits) AS T
  , UNNEST(product)
WHERE productSKU IS NOT NULL
GROUP BY 1
)

# Calculating the share of searches for each word on that specific day

, dataset1 AS
(
SELECT A.*
  , B.nr_searches_dag
  , nr_searches_word/nr_searches_dag As share
FROM preselect A
JOIN dates_volumes B
  ON A.date_derived = B.date_derived
WHERE nr_searches_word > 2
)

#Finding the previous values for number of words/share for each unique keyword

, dataset2 AS
(
SELECT *
  , LAG(nr_searches_word) OVER(PARTITION BY Searchkeyword ORDER BY date_derived) AS LAG_nr_word
  , LAG(share) OVER(PARTITION BY Searchkeyword ORDER BY date_derived) AS LAG_share
FROM dataset1
ORDER BY searchkeyword, date_derived
)

# Calculating indexnumbers for share and number of searches for each unique keyword

, dataset3 AS
(
SELECT *
  , nr_searches_word/LAG_nr_word AS index_searches_yesterday
  , share/LAG_share AS index_share_yesterday
FROM dataset2
)

# Calculating the power of the indexnumbers
# Calculating the moving averages

, dataset4 AS
(
SELECT *
  , (index_searches_yesterday - 1)* nr_searches_word AS Power_nr_searches
  , (index_share_yesterday - 1) * nr_searches_word AS Power_share
  , AVG(nr_searches_word) OVER(PARTITION BY SearchKeyword ORDER BY date_derived ASC ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS MA_short_searches
  , AVG(nr_searches_word) OVER(PARTITION BY SearchKeyword ORDER BY date_derived ASC ROWS BETWEEN 15 PRECEDING AND CURRENT ROW) AS MA_long_searches
FROM dataset3
)

# Creating the final dataset for DataStudio
# Calculating the power of the trend numbers based on the moving averages
SELECT *
  , (MA_short_searches - MA_long_searches) AS trend_power
  , (MA_short_searches - MA_long_searches) / MA_short_searches AS relative_trend_power
FROM dataset4
	-- Create the long and short range (in days) for the trend calculation. See the explanation in the blog for the proper use of these values.

	#Create dataset with all onsite searches based on unique visitors and keyword+date
	#Only picking up words that do not contain other values then text or only numbers
	# Excluding often misused listerpages when CPC is the channel

	WITH preselect AS
	(
	SELECT DISTINCT(date) as date_derived
	, UPPER(productSKU) as SearchKeyword
	, COUNT (DISTINCT fullvisitorID) as nr_searches_word
	FROM `bigquery-public-data.google_analytics_sample.ga_sessions_2017*`
	, UNNEST(hits) AS T
	, UNNEST(product) AS P
	WHERE productSKU IS NOT NULL
	GROUP BY 1,2
	ORDER BY 2,1 DESC
	)

	# Calculating the number of searches per day

	, dates_volumes AS
	(
	SELECT DISTINCT(date) as date_derived
	, COUNT (DISTINCT fullvisitorid) as nr_searches_dag
	FROM `bigquery-public-data.google_analytics_sample.ga_sessions_2017*`
	, UNNEST(hits) AS T
	, UNNEST(product)
	WHERE productSKU IS NOT NULL
	GROUP BY 1
	)

	# Calculating the share of searches for each word on that specific day

	, dataset1 AS
	(
	SELECT A.*
	, B.nr_searches_dag
	, nr_searches_word/nr_searches_dag As share
	FROM preselect A
	JOIN dates_volumes B
	ON A.date_derived = B.date_derived
	WHERE nr_searches_word > 2
	)

	#Finding the previous values for number of words/share for each unique keyword

	, dataset2 AS
	(
	SELECT *
	, LAG(nr_searches_word) OVER(PARTITION BY Searchkeyword ORDER BY date_derived) AS LAG_nr_word
	, LAG(share) OVER(PARTITION BY Searchkeyword ORDER BY date_derived) AS LAG_share
	FROM dataset1
	ORDER BY searchkeyword, date_derived
	)

	# Calculating indexnumbers for share and number of searches for each unique keyword

	, dataset3 AS
	(
	SELECT *
	, nr_searches_word/LAG_nr_word AS index_searches_yesterday
	, share/LAG_share AS index_share_yesterday
	FROM dataset2
	)

	# Calculating the power of the indexnumbers
	# Calculating the moving averages

	, dataset4 AS
	(
	SELECT *
	, (index_searches_yesterday - 1)* nr_searches_word AS Power_nr_searches
	, (index_share_yesterday - 1) * nr_searches_word AS Power_share
	, AVG(nr_searches_word) OVER(PARTITION BY SearchKeyword ORDER BY date_derived ASC ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS MA_short_searches
	, AVG(nr_searches_word) OVER(PARTITION BY SearchKeyword ORDER BY date_derived ASC ROWS BETWEEN 15 PRECEDING AND CURRENT ROW) AS MA_long_searches
	FROM dataset3
	)

	# Creating the final dataset for DataStudio
	# Calculating the power of the trend numbers based on the moving averages
	SELECT *
	, (MA_short_searches - MA_long_searches) AS trend_power
	, (MA_short_searches - MA_long_searches) / MA_short_searches AS relative_trend_power
	FROM dataset4