Skip to content

Instantly share code, notes, and snippets.

View Osiyuks's full-sized avatar

Aleksandr Osiyuk Osiyuks

View GitHub Profile
#standardSQL
SELECT
ds,
cookcieID,
countIF(viewersLanding=1) AS viewersLanding,
countIF(ConversionPage=1 AND viewersLanding=1) AS ConversionPage
FROM (
SELECT
clientId,
REGEXP_EXTRACT(cd.value, r'{cookie default="cookcieID" type="input"}.\d+.(\d+)') AS cookcieID,
-- 1) Создаем UDF, которая меняет кодировку
CREATE TEMPORARY FUNCTION
URL_DECODE(enc STRING)
RETURNS STRING
LANGUAGE js AS """
try {
return decodeURI(enc);;
} catch (e) { return null }
return null;
""";
FORMAT_DATE("%Y%W", PARSE_DATE("%d.%m.%Y %H:%M", ds)) as ds,
-------------
regexp_extract(page, r'[?&]cityid=([^&?]*)') as cityid
-------------
STRING_AGG(channel, ' > ') OVER (PARTITION BY cid ORDER BY timestamp) AS path
-------------
Regexp_extract(cd9,r'^(?:[^\s]*\s){0}([^\s]*)\s?') as Apply,
Regexp_extract(cd9,r'^(?:[^\s]*\s){2}([^\s]*)\s?') as Log,
Regexp_extract(cd9,r'^(?:[^\s]*\s){4}([^\s]*)\s?') as Registration,
Regexp_extract(cd9,r'^(?:[^\s]*\s){6}([^\d]*)') as Resume
-- http://blog.troyshu.com/2017/11/
SELECT
count(distinct e0.user_dim.app_info.app_instance_id) as first_openers
, count(distinct e1_user) as photo_taken_1
, count(distinct e2_user) as photo_taken_2
, count(distinct e3_user) as photo_taken_3
FROM `youday_IOS.app_events_*` as e0, UNNEST (e0.event_dim) as e0_events
LEFT JOIN (
SELECT
with transctn as
(
select *
from
(select *, row_number() over(partition by ds, cidv order by timestamp) as rownum
from
(select concat(clientId,'_',regexp_extract(page.pagePath, r'/thanks/(\d+)')) as cidv,
timestamp,
FORMAT_DATE("%Y%m%d", DATE(TIMESTAMP_SECONDS(timestamp))) AS ds,
1 as transactions,
select *, TIMESTAMP(DATETIME(cast(regexp_extract(time, r'^(\d{4})') as int64), cast(regexp_extract(time, r'^\d{4}(\d{2})') as int64), cast(regexp_extract(time, r'^\d{4}\d{2}(\d{2})') as int64), cast(regexp_extract(time, r'^\d{4}\d{2}\d{2}(\d{2})') as int64), cast(regexp_extract(time, r'^\d{4}\d{2}\d{2}\d{2}(\d{2})') as int64), 59)) as d
from `bigdata.lead`
const Logging = require('@google-cloud/logging')
const parser = require('ua-parser-js')
const Busboy = require('busboy')
const log = new Logging({
projectId: process.env.X_GOOGLE_GCLOUD_PROJECT
with v as (
SELECT
CASE
WHEN trafficSource.referralPath like '%vacancy/list%'
and trafficSource.referralPath like '%rabota.ua%'THEN 'sitesearch'
WHEN regexp_extract(page.pagePath, r'[?&]utm_source=([^&?]*)') like '%criteo%'
or regexp_extract(page.pagePath, r'[?&]utm_source=([^&?]*)') like '%rtbhouse%'
or regexp_extract(page.pagePath, r'[?&]utm_medium=([^&?]*)') like '%cpc%'
or regexp_extract(page.pagePath, r'[?&]gclid=([^&?]*)') is not null THEN 'cpc'
WHEN (trafficSource.referralPath like '%google%'
WITH d AS (
-- should return (cid - user id, vid - item id, rating)
SELECT
clientId as cid,
REGEXP_EXTRACT(page.pagePath, r'/company\d+/vacancy(\d+)') AS vid,
MAX(
CASE
WHEN page.pagePath LIKE '%apply=thanks%' THEN 5
WHEN page.pagePath LIKE '%mode=apply%' THEN 2
SELECT name, value,
--Возвращает кумулятивное распределение значения группе значений.
--Рассчитывается по формуле <количество предыдущих строк окна, включая текущую> / <общее количество строк окна>
-- CUME_DIST(value) OVER (PARTITION BY name ORDER BY value) AS CUME_DIST,
--Возвращает ранг значения текущей строки относительно значений всего окна, исходя из заданной сортировки.
-- DENSE_RANK(value) OVER (PARTITION BY name ORDER BY value) as DENSE_RANK,
--Возвращает первое значение, или последнее (LAST_VALUE)
-- FIRST_VALUE(value) OVER (PARTITION BY name) as FIRST_VALUE,
--Возвращает значение строки по указанному полю, при этом поле смещено на указанное в функции количество строк до текущей строки.
--Синтаксис: LAG (field_name[, offset[, default_value]])
with t0 as (
SELECT FORMAT_DATE("%Y%m%d", DATE(TIMESTAMP_SECONDS(timestamp))) AS ds, clientId, timestamp,
case
when page.hostname <> 'm.rabota.ua' and page.hostname <> 'app.rabota.ua' then 'desktop'
else 'mobile' end as platform,
cd.value as cd9,
case
when NET.REG_DOMAIN(trafficSource.referralPath) NOT LIKE '%rabota.ua%' or NET.REG_DOMAIN(trafficSource.referralPath) IS NULL then 'outer'
when trafficSource.referralPath like '%cvwelcome?step=%' then 'step'
when regexp_extract(trafficSource.referralPath, r'(.*)\?')='https://rabota.ua/?'