Skip to content

Instantly share code, notes, and snippets.

@bindiego
Last active August 7, 2019 09:59
Show Gist options
  • Save bindiego/17898d41e98fae201ce2c1d1da3ba9fc to your computer and use it in GitHub Desktop.
Save bindiego/17898d41e98fae201ce2c1d1da3ba9fc to your computer and use it in GitHub Desktop.
Google Cloud BigQuery sample schema for clickstream events
#!/bin/bash
pwd=`pwd`
bq mk --table bindiego:click_stream.events $pwd/schema.json
delete from click_stream.events as e
where e.uid = '003'
insert into click_stream.events
(uid, event)
values
('001', [STRUCT('k1', STRUCT('v1', 1, 1.2))])
insert into click_stream.events
(uid, event)
values
('002', [STRUCT('k1', STRUCT('v1', 1, 1.2)),STRUCT('k2', STRUCT('v2', 12, 1.23)),STRUCT('k3', STRUCT('v3', 123, 1.234))])
insert into click_stream.events
(uid, event)
values
('003', [STRUCT('k1', STRUCT('v1', null, null)),STRUCT('k2', STRUCT(null, 12, null)),STRUCT('k3', STRUCT(null, null, 1.234))])
select e.uid, ev.*
from click_stream.events as e, unnest(e.event) as ev
where ev.key = 'k1' and ev.value.int_value is not null and ev.value.int_value >= 1
order by e.uid
select e.uid, sum(ev.value.int_value)
from click_stream.events as e, unnest(e.event) as ev
where ev.key = 'k1' and ev.value.int_value is not null
group by e.uid
order by e.uid
-------------------------------------------------
select e.uid, shijian
from click_stream.events as e
cross join unnest(e.event) as shijian
where shijian.key = 'k1' and shijian.value.int_value is not null and shijian.value.int_value >= 1
order by e.uid
select e.uid, sum(shijian.value.int_value)
from click_stream.events as e
cross join unnest(e.event) as shijian
where shijian.key = 'k1' and shijian.value.int_value is not null
group by e.uid
order by e.uid
[
{
"description": "timestamp",
"name": "ts",
"type": "TIMESTAMP",
"mode": "NULLABLE"
},
{
"name": "uid",
"type": "STRING",
"mode": "REQUIRED"
},
{
"name": "age",
"type": "INTEGER",
"mode": "NULLABLE"
},
{
"name": "event",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "key",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "value",
"type": "RECORD",
"mode": "NULLABLE",
"fields": [
{
"name": "string_value",
"type": "STRING"
},
{
"name": "int_value",
"type": "INTEGER"
},
{
"name": "float_value",
"type": "FLOAT"
}
]
}
]
}
]
update click_stream.events
set event = array(
select ev from unnest(event) as ev
where ev.key != 'k2'
union all
select ('k2', ('v2_new', 12, 12.3))
)
where uid = '003'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment