Last active
April 10, 2020 12:02
-
-
Save nreijmer/4144471ed436d680b47f3b5b1742e4e0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create the long and short range (in days) for the trend calculation. | |
# See the explanation in the blog for the proper use of these values. | |
# SET (short, long) = (5,14)-- any number | |
# Create dataset with all onsite searches based on unique visitors and keyword+date | |
# Only picking up words that do not contain other values then text or only numbers | |
WITH | |
preselect AS ( | |
SELECT | |
DISTINCT(date) AS date_derived, | |
UPPER(Page.SearchKeyword) AS SearchKeyword, | |
COUNT (DISTINCT fullvisitorID) AS nr_searches_word | |
FROM | |
--- enter: dataset with the Google Analytics offloading schema | |
, | |
UNNEST(hits) AS T | |
WHERE | |
Page.SearchKeyword IS NOT NULL | |
AND REGEXP_CONTAINS(Page.SearchKeyword, '^[A-Za-z0-9]+$') | |
AND REGEXP_CONTAINS(Page.SearchKeyword, r'[a-z]') | |
AND (isEntrance IS NULL | |
AND trafficSource.medium NOT LIKE 'cpc') | |
GROUP BY | |
1, | |
2 | |
ORDER BY | |
2, | |
1 DESC ), | |
# Calculating the number of searches per day | |
dates_volumes AS ( | |
SELECT | |
DISTINCT(date) AS date_derived, | |
COUNT (DISTINCT fullvisitorid) AS nr_searches_dag | |
FROM | |
--- enter: dataset with the Google Analytics offloading schema, same as in "preselect" | |
, | |
UNNEST(hits) AS T | |
WHERE | |
Page.SearchKeyword IS NOT NULL | |
AND (isEntrance IS NULL | |
AND trafficSource.medium NOT LIKE 'cpc') | |
GROUP BY | |
1 ), | |
# Calculating the share of searches for each word on that specific day | |
share_of_searches AS ( | |
SELECT | |
A.*, | |
B.nr_searches_dag, | |
nr_searches_word/nr_searches_dag AS share | |
FROM | |
preselect A | |
JOIN | |
dates_volumes B | |
ON | |
A.date_derived = B.date_derived | |
WHERE | |
nr_searches_word > 2 ), | |
#Finding the previous values for number of words/share for each unique keyword | |
previous_share_of_searches AS ( | |
SELECT | |
*, | |
LAG(nr_searches_word) OVER(PARTITION BY Searchkeyword ORDER BY date_derived) AS LAG_nr_word, | |
LAG(share) OVER(PARTITION BY Searchkeyword ORDER BY date_derived) AS LAG_share | |
FROM | |
share_of_searches | |
ORDER BY | |
searchkeyword, | |
date_derived ), | |
# Calculating indexnumbers for share and number of searches for each unique keyword | |
index_share_of_searches AS ( | |
SELECT | |
*, | |
nr_searches_word/LAG_nr_word AS index_searches_yesterday, | |
share/LAG_share AS index_share_yesterday | |
FROM | |
previous_share_of_searches ), | |
# Calculating the power of the indexnumbers | |
# Calculating the moving averages | |
moving_averages AS ( | |
SELECT | |
*, | |
(index_searches_yesterday - 1)* nr_searches_word AS Power_nr_searches, | |
(index_share_yesterday - 1) * nr_searches_word AS Power_share, | |
AVG(nr_searches_word) OVER(PARTITION BY SearchKeyword ORDER BY date_derived ASC ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS MA_short_searches, | |
AVG(nr_searches_word) OVER(PARTITION BY SearchKeyword ORDER BY date_derived ASC ROWS BETWEEN 15 PRECEDING AND CURRENT ROW) AS MA_long_searches | |
FROM | |
index_share_of_searches ) | |
# Creating the final dataset for DataStudio | |
# Calculating the power of the trend numbers based on the moving averages | |
SELECT | |
*, | |
(MA_short_searches - MA_long_searches) AS trend_power, | |
(MA_short_searches - MA_long_searches) / MA_short_searches AS relative_trend_power | |
FROM | |
moving_averages |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment