Skip to content

Instantly share code, notes, and snippets.

@relud
Last active March 22, 2019 16:34
Show Gist options
  • Save relud/759ab42c077180d1931d5f1549fdac89 to your computer and use it in GitHub Desktop.
Save relud/759ab42c077180d1931d5f1549fdac89 to your computer and use it in GitHub Desktop.

bigquery udf to calculate MOD over a 128-bit integer stored as bytes:

-- requires udf_decode_int64(raw)
CREATE TEMP FUNCTION udf_mod_int128(dividend BYTES, divisor INT64) AS (
  IF(
    -- check divisor to ensure it cannot result in overflow
    SAFE.DIV(0x7FFFFFFFFFFFFFFF, MOD(0x100000000, divisor))
    < COALESCE(SAFE.ABS(divisor) - 1, ABS(divisor+1)),
    ERROR("error: divisor could result in overflow"),
    0
  ) + MOD(
    MOD(
      MOD(
        udf_decode_int64(SUBSTR(dividend, 1, 8)),
        divisor
      ) * MOD(
        -- use negative here because the largest possible int64 is negative
        -- so the largest possible multiplier should be negative as well
        MOD(-0x100000000, divisor)
        * MOD(0x100000000, divisor),
        divisor
      )
      ,
      divisor
    ) + MOD(udf_decode_int64(SUBSTR(dividend, 9, 8)), divisor),
    divisor
  )
);

decode INT64 from 8 BYTES

CREATE TEMP FUNCTION udf_decode_int64(raw BYTES) AS (
  CAST(
    -- bigquery can only decode raw int from bytes via hex
    CONCAT('0x', TO_HEX(
      -- remove most significant bit because INT64 is signed
      raw & b"\x7f\xff\xff\xff\xff\xff\xff\xff"
    ))
  AS INT64)
  -- apply sign from most significant bit
  + IF(
    -- if most significant bit is set
    SUBSTR(raw, 1, 1) > b"\x7f",
    -- then apply sign
    -0x8000000000000000,
    -- else sign is already correct
    0)
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment