Skip to content

Instantly share code, notes, and snippets.

@saptarshiguha
Created November 9, 2019 00:26
Show Gist options
  • Save saptarshiguha/5eebd2943c6ff0b25ef3eeb5c83773c0 to your computer and use it in GitHub Desktop.
Save saptarshiguha/5eebd2943c6ff0b25ef3eeb5c83773c0 to your computer and use it in GitHub Desktop.
c.sql
REATE OR REPLACE FUNCTION analysis.sg_histogram_aggregate(r ARRAY<STRUCT<v STRING>>) RETURNS Array<STRUCT<k STRING, v INT64>>
LANGUAGE js AS \"\"\"
// called as sg_histogram_aggregate(ARRAY_AGG(Struct( JSON_EXTRACT(A HIstogram,'$.values') as v)))
var arrayLength = r.length;
if( arrayLength == 0) {
return( [] );
}
var d = 0;
var accum = { };
for (var i = 0; i < arrayLength; i++) {
var o = r[i].v
if(o === null) { continue; }
o = JSON.parse(o);
for (var key in o) {
if( accum[ key ] === undefined){
accum[ key ] = 0;
}
accum[ key] = accum[ key ] + parseInt(o[key],10) ;
}
}
if( Object.keys(accum).length == 0){
return([]);
}else{
var ot = [];
for( var key in accum){
ot.push({'k': key, 'v': accum[key]})
}
return(ot);
}
\"\"\";
CREATE OR REPLACE FUNCTION analysis.sg_histogram_center(V Array<STRUCT<k STRING, v INT64>>,uselog BOOL) AS (
-- computers the 'center' of a histogram or the geometric mean if uselog is true
-- expects as input the output of analysis.sg_histogram_aggregate
(
with a as (
select
SUM(obj.v) as n,
case when uselog=false then sum(obj.v*CAST(obj.k as INT64))
else sum(obj.v*log(1+CAST(obj.k as INT64)) ) end as s
from
unnest(V) as obj
)
select
case when n=0 then STRUCT(0 as m, 0 as n)
when uselog = false then STRUCT(exp(s/n)-1 as m, n as n)
else STRUCT(s/n as center, n as nobs) end
from a
)
);
CREATE OR REPLACE FUNCTION analysis.sg_prob_LE_c(V ARRAY<Struct<k STRING,v INT64>>,c FLOAT64,inv BOOL) AS (
-- Prob(X<=c), if inv == True then 1-Prob(X<=c) == > Prob(X > c)
-- expects as input the output of analysis.sg_histogram_aggregate
(
with a as (
select
sum(x.v) as denominator,
sum(case when cast(x.k as INT64) <= c then x.v else 0 end) as numerator
from unnest(V) as x
)
select
case when inv = TRUE then 1-numerator/denominator else numerator/denominator end
from a
)
);
CREATE OR REPLACE FUNCTION analysis.sg_prob_IN_c(V ARRAY<Struct<k STRING,v INT64>>,c ARRAY<INT64>) AS (
-- Prob(X in {c})
-- expects as input the output of analysis.sg_histogram_aggregate
(
with a as (
select
sum(x.v) as denominator,
sum(case when cast(x.k as INT64) IN UNNEST(c) then x.v else 0 end) as numerator
from unnest(V) as x
)
select numerator/denominator
from a
)
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment