Skip to content

Instantly share code, notes, and snippets.

+ bq query '--destination_table=relud-17123:test.clients_daily_v6$20210105' --project_id=moz-fx-data-shared-prod --parameter=submission_date:DATE:2021-01-05 --time_partitioning_field=submission_date --clustering_fields=sample_id --replace
Waiting on bqjob_r3a6a16833c9cc1c0_00000176f8395bca_1 ... (285s) Current status: DONE
+ bq query --dataset_id=relud-17123:test --max_rows=1 --format=prettyjson --parameter=submission_date:DATE:2021-01-05
Waiting on bqjob_r72f38ead5639c19b_00000176f83dcf69_1 ... (48s) Current status: DONE
[
{
"mismatched_aborts_content_sum": "0",
"mismatched_aborts_gmplugin_sum": "0",
"mismatched_aborts_plugin_sum": "0",
"mismatched_active_addons": "0",
[Thread-92965] WARN com.mozilla.telemetry.ingestion.sink.io.Pubsub$Read - Exception while attempting to deliver message:
java.util.concurrent.CompletionException: java.lang.ClassCastException: com.fasterxml.jackson.databind.node.NullNode cannot be cast to com.fasterxml.jackson.databind.node.ObjectNode
at java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273)
at java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280)
at java.util.concurrent.CompletableFuture.uniCompose(CompletableFuture.java:975)
at java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:940)
at java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:456)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ClassCastException: com.fasterxml.jackson.databind.node.NullNode cannot be cast to com.fasterxml.jackson.databind.node.ObjectNode
at com.mozilla.telemetry.ingestion.sink.tran
@relud
relud / dataproc_run_export.sh
Last active August 20, 2019 00:08
export client probe counts v1 to firestore
#!/usr/bin/env bash
set -e
CLUSTER="${CLUSTER:-export-to-firestore}"
gcloud beta dataproc clusters create "$CLUSTER"
--max-idle=10m \
--metadata='PIP_PACKAGES=google-cloud-firestore==1.3.0' \
--initialization-actions gs://dataproc-initialization-actions/python/pip-install.sh
@relud
relud / decoded.bqschema.json
Last active August 1, 2019 19:27
non-payload-format bigquery schemas
[
{
"fields": [
{
"mode": "NULLABLE",
"name": "document_namespace",
"type": "STRING"
},
{
"mode": "NULLABLE",
from google.cloud import bigquery
client = bigquery.Client("moz-fx-data-shar-nonprod-efed")
all_live = sum(
client.get_table(table.full_table_id.replace(":", ".") + "$20190729").num_rows
for dataset in client.list_datasets()
if dataset.dataset_id.endswith("_live")
for table in client.list_tables(dataset.reference)
)
-- Query generated by: sql/clients_daily_scalar_aggregates.sql.py
CREATE TEMP FUNCTION
udf_aggregate_map_sum(maps ANY TYPE) AS (STRUCT(ARRAY(
SELECT
AS STRUCT key,
SUM(value) AS value
FROM
UNNEST(maps),
UNNEST(key_value)
GROUP BY

bigquery udf to calculate MOD over a 128-bit integer stored as bytes:

-- requires udf_decode_int64(raw)
CREATE TEMP FUNCTION udf_mod_int128(dividend BYTES, divisor INT64) AS (
  IF(
    -- check divisor to ensure it cannot result in overflow
    SAFE.DIV(0x7FFFFFFFFFFFFFFF, MOD(0x100000000, divisor))
    < COALESCE(SAFE.ABS(divisor) - 1, ABS(divisor+1)),
    ERROR("error: divisor could result in overflow"),
    0
@relud
relud / UDF.sql
Last active November 13, 2018 22:58
BigQuery UDF getKey
CREATE TEMP FUNCTION getKey(map ANY TYPE, k ANY TYPE) AS (
(
SELECT key_value.value
FROM UNNEST(map.key_value) AS key_value
WHERE key_value.key = k
LIMIT 1
)
);
@relud
relud / test_repartition.scala
Last active January 23, 2018 22:55 — forked from mreid-moz/test_repartition.scala
Test repartitioning behaviour when writing parquet data.
import java.util.UUID.randomUUID
import scala.sys.process._
import java.util.zip.CRC32
import com.mozilla.telemetry.utils.getOrCreateSparkSession
val spark = getOrCreateSparkSession("test")
spark.sparkContext.setLogLevel("WARN")
import spark.implicits._
def getPartitionId(clientId: String, sampleId: Int, filesPerPartition: Int) = {

Keybase proof

I hereby claim:

  • I am relud on github.
  • I am relud (https://keybase.io/relud) on keybase.
  • I have a public key whose fingerprint is 9841 09AE 5B29 DD74 AF27 AF74 94CF 5D48 1347 FFEC

To claim this, I am signing this object: