Skip to content

Instantly share code, notes, and snippets.

View janbenetka's full-sized avatar

Jan Benetka janbenetka

  • Unacast
  • Pilsen, Czech Republic
View GitHub Profile
@janbenetka
janbenetka / bq_date_loop.sql
Last active November 20, 2022 12:46
[BQ Loop over days] Date loop for backfills #bigquery #sql #date #loop
DECLARE i INT64 DEFAULT 0;
DECLARE DATES ARRAY<DATE>;
DECLARE event_date DATE;
SET DATES = GENERATE_DATE_ARRAY(DATE(2019,1,1), DATE(2019, 12, 31), INTERVAL 1 DAY);
LOOP
SET i = i + 1;
IF i > ARRAY_LENGTH(DATES) THEN
@janbenetka
janbenetka / hist_bq.sql
Last active November 4, 2022 12:19
[Histogram in BigQuery] #bigquery
DECLARE HIST_STEPS INT64 DEFAULT 20;
WITH data AS (
SELECT green_fraction as value
FROM `uc-x-data.14_day_retention.green_counties`
)
, stats AS (
SELECT min+step*i min, min+step*(i+1)max
FROM (
SELECT max-min diff, min, max, (max-min)/HIST_STEPS step, GENERATE_ARRAY(0, HIST_STEPS, 1) i
@janbenetka
janbenetka / h3_in_bigquery.sql
Last active October 27, 2022 10:02
[H3 hexagon functions in BigQuery] #h3 #hex
SELECT jslibs.h3.ST_GEOGPOINTFROMH3("u2ce02j")
SELECT jslibs.h3.ST_H3_BOUNDARY(jslibs.h3.ST_H3(ST_GEOGPOINT(13.377534960188237, 49.747300576525554), 11))
SELECT jslibs.h3.h3GetResolution("u2ce02j");
# lat/lon to hexagon
SELECT jslibs.h3.ST_H3(ST_GEOGPOINT(statistics.coordinate.longitude, statistics.coordinate.latitude), 10) AS dwell_hex_id,
WITH polygon AS (
@janbenetka
janbenetka / random_sample_of_crtain_size.sql
Created July 21, 2022 10:22
[Random sample in BQ] #bigquery #random
SELECT * FROM subset
WHERE RAND() < 1e6 / (SELECT COUNT(*) FROM subset)
@janbenetka
janbenetka / jupyter_notebook_width.py
Created July 18, 2022 09:23
[Jupyter notebook width] #jupyter
from IPython.display import HTML, IFrame, display
display(HTML("<style>.container { width:1400px !important; }</style>"))
@janbenetka
janbenetka / dbpedia_county_info.sparql
Created November 25, 2021 08:08
[DBPedia County Info] #sparql #rdf @dbpedia
PREFIX dbpedia2: <http://dbpedia.org/property/>
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
SELECT *
WHERE {
?county rdf:type yago:County108546183 .
?county dbo:country dbr:United_States.
?county dbo:state ?state.
?county rdfs:label ?county_label.
@janbenetka
janbenetka / find_adjacent_tracts.sql
Created November 16, 2021 01:35
[Adjacent tracts SQL] #bigquery #sql #geo
CREATE OR REPLACE TABLE `uc-prox-core-dev.z_jan_retained.adjacent_tracts` AS
WITH adjacent_tracts AS (
SELECT a.fips, b.fips as fips_adjacent, a.state_name, b.state_name as state_name_adjacent, a.county_name, b.county_name as county_name_adjacent, a.geog, b.geog as geog_adjacent,
FROM `uc-atlas.maps_us.census_tracts` a, `uc-atlas.maps_us.census_tracts` b
WHERE ST_INTERSECTS(ST_BUFFER(a.geog, 25), b.geog)
)
SELECT * FROM adjacent_tracts
@janbenetka
janbenetka / cell_height_colab.py
Created October 29, 2021 22:11
[Restrict height of cell in Colab (in px)] #colab
from IPython.display import Javascript # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))
@janbenetka
janbenetka / osm_pois_from_bq.sql
Created October 25, 2021 07:49
OSM POI w/ details from BigQuery
WITH osm AS (
SELECT
layer_code,
layer_class,
layer_name,
gdal_type as geography_type,
osm_id,
CASE
WHEN (tags.key = 'name') THEN tags.value
ELSE ''
@janbenetka
janbenetka / norm_counts.sql
Created October 15, 2021 09:10
[Normalized Counts per Group] #bigquery #over
WITH daily AS (
SELECT
local_event_date,
venue_type,
SUM(device_count) device_count,
SUM(person_count) person_count,
FROM `uc-prox-core-dev.international_metrics.traffic_trends`
GROUP BY local_event_date, venue_type
)