Skip to content

Instantly share code, notes, and snippets.

@nreijmer
Created March 11, 2020 19:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save nreijmer/dd1b20330eef2df3348d0b44a5962a1c to your computer and use it in GitHub Desktop.
Save nreijmer/dd1b20330eef2df3348d0b44a5962a1c to your computer and use it in GitHub Desktop.
-- Create the long and short range (in days) for the trend calculation. See the explanation in the blog for the proper use of these values.
#Create dataset with all onsite searches based on unique visitors and keyword+date
#Only picking up words that do not contain other values then text or only numbers
# Excluding often misused listerpages when CPC is the channel
WITH preselect AS
(
SELECT DISTINCT(date) as date_derived
, UPPER(productSKU) as SearchKeyword
, COUNT (DISTINCT fullvisitorID) as nr_searches_word
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_2017*`
, UNNEST(hits) AS T
, UNNEST(product) AS P
WHERE productSKU IS NOT NULL
GROUP BY 1,2
ORDER BY 2,1 DESC
)
# Calculating the number of searches per day
, dates_volumes AS
(
SELECT DISTINCT(date) as date_derived
, COUNT (DISTINCT fullvisitorid) as nr_searches_dag
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_2017*`
, UNNEST(hits) AS T
, UNNEST(product)
WHERE productSKU IS NOT NULL
GROUP BY 1
)
# Calculating the share of searches for each word on that specific day
, dataset1 AS
(
SELECT A.*
, B.nr_searches_dag
, nr_searches_word/nr_searches_dag As share
FROM preselect A
JOIN dates_volumes B
ON A.date_derived = B.date_derived
WHERE nr_searches_word > 2
)
#Finding the previous values for number of words/share for each unique keyword
, dataset2 AS
(
SELECT *
, LAG(nr_searches_word) OVER(PARTITION BY Searchkeyword ORDER BY date_derived) AS LAG_nr_word
, LAG(share) OVER(PARTITION BY Searchkeyword ORDER BY date_derived) AS LAG_share
FROM dataset1
ORDER BY searchkeyword, date_derived
)
# Calculating indexnumbers for share and number of searches for each unique keyword
, dataset3 AS
(
SELECT *
, nr_searches_word/LAG_nr_word AS index_searches_yesterday
, share/LAG_share AS index_share_yesterday
FROM dataset2
)
# Calculating the power of the indexnumbers
# Calculating the moving averages
, dataset4 AS
(
SELECT *
, (index_searches_yesterday - 1)* nr_searches_word AS Power_nr_searches
, (index_share_yesterday - 1) * nr_searches_word AS Power_share
, AVG(nr_searches_word) OVER(PARTITION BY SearchKeyword ORDER BY date_derived ASC ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS MA_short_searches
, AVG(nr_searches_word) OVER(PARTITION BY SearchKeyword ORDER BY date_derived ASC ROWS BETWEEN 15 PRECEDING AND CURRENT ROW) AS MA_long_searches
FROM dataset3
)
# Creating the final dataset for DataStudio
# Calculating the power of the trend numbers based on the moving averages
SELECT *
, (MA_short_searches - MA_long_searches) AS trend_power
, (MA_short_searches - MA_long_searches) / MA_short_searches AS relative_trend_power
FROM dataset4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment