Skip to content

Instantly share code, notes, and snippets.

@nreijmer
Last active April 10, 2020 12:02
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save nreijmer/4144471ed436d680b47f3b5b1742e4e0 to your computer and use it in GitHub Desktop.
Save nreijmer/4144471ed436d680b47f3b5b1742e4e0 to your computer and use it in GitHub Desktop.
# Create the long and short range (in days) for the trend calculation.
# See the explanation in the blog for the proper use of these values.
# SET (short, long) = (5,14)-- any number
# Create dataset with all onsite searches based on unique visitors and keyword+date
# Only picking up words that do not contain other values then text or only numbers
WITH
preselect AS (
SELECT
DISTINCT(date) AS date_derived,
UPPER(Page.SearchKeyword) AS SearchKeyword,
COUNT (DISTINCT fullvisitorID) AS nr_searches_word
FROM
--- enter: dataset with the Google Analytics offloading schema
,
UNNEST(hits) AS T
WHERE
Page.SearchKeyword IS NOT NULL
AND REGEXP_CONTAINS(Page.SearchKeyword, '^[A-Za-z0-9]+$')
AND REGEXP_CONTAINS(Page.SearchKeyword, r'[a-z]')
AND (isEntrance IS NULL
AND trafficSource.medium NOT LIKE 'cpc')
GROUP BY
1,
2
ORDER BY
2,
1 DESC ),
# Calculating the number of searches per day
dates_volumes AS (
SELECT
DISTINCT(date) AS date_derived,
COUNT (DISTINCT fullvisitorid) AS nr_searches_dag
FROM
--- enter: dataset with the Google Analytics offloading schema, same as in "preselect"
,
UNNEST(hits) AS T
WHERE
Page.SearchKeyword IS NOT NULL
AND (isEntrance IS NULL
AND trafficSource.medium NOT LIKE 'cpc')
GROUP BY
1 ),
# Calculating the share of searches for each word on that specific day
share_of_searches AS (
SELECT
A.*,
B.nr_searches_dag,
nr_searches_word/nr_searches_dag AS share
FROM
preselect A
JOIN
dates_volumes B
ON
A.date_derived = B.date_derived
WHERE
nr_searches_word > 2 ),
#Finding the previous values for number of words/share for each unique keyword
previous_share_of_searches AS (
SELECT
*,
LAG(nr_searches_word) OVER(PARTITION BY Searchkeyword ORDER BY date_derived) AS LAG_nr_word,
LAG(share) OVER(PARTITION BY Searchkeyword ORDER BY date_derived) AS LAG_share
FROM
share_of_searches
ORDER BY
searchkeyword,
date_derived ),
# Calculating indexnumbers for share and number of searches for each unique keyword
index_share_of_searches AS (
SELECT
*,
nr_searches_word/LAG_nr_word AS index_searches_yesterday,
share/LAG_share AS index_share_yesterday
FROM
previous_share_of_searches ),
# Calculating the power of the indexnumbers
# Calculating the moving averages
moving_averages AS (
SELECT
*,
(index_searches_yesterday - 1)* nr_searches_word AS Power_nr_searches,
(index_share_yesterday - 1) * nr_searches_word AS Power_share,
AVG(nr_searches_word) OVER(PARTITION BY SearchKeyword ORDER BY date_derived ASC ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS MA_short_searches,
AVG(nr_searches_word) OVER(PARTITION BY SearchKeyword ORDER BY date_derived ASC ROWS BETWEEN 15 PRECEDING AND CURRENT ROW) AS MA_long_searches
FROM
index_share_of_searches )
# Creating the final dataset for DataStudio
# Calculating the power of the trend numbers based on the moving averages
SELECT
*,
(MA_short_searches - MA_long_searches) AS trend_power,
(MA_short_searches - MA_long_searches) / MA_short_searches AS relative_trend_power
FROM
moving_averages
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment