Last active
February 2, 2022 07:42
-
-
Save sh16ma/6fb9d31f8771b2a36698e1b9d2014542 to your computer and use it in GitHub Desktop.
#🔎 #BigQuery #カラム別要約統計量 #Pandasでええやん #気候データ(サンプル利用)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#standaardSQL | |
with | |
encord_MinTemp as (select cast(MinTemp as float64) as MinTemp from `pj.ds.weatherAUS` where MinTemp <> 'NA') | |
,encord_MaxTemp as (select cast(MaxTemp as float64) as MaxTemp from `pj.ds.weatherAUS` where MaxTemp <> 'NA') | |
,encord_Rainfall as (select cast(Rainfall as float64) as Rainfall from `pj.ds.weatherAUS` where Rainfall <> 'NA') | |
,encord_Evaporation as (select cast(Evaporation as float64) as Evaporation from `pj.ds.weatherAUS` where Evaporation <> 'NA') | |
,encord_Sunshine as (select cast(Sunshine as float64) as Sunshine from `pj.ds.weatherAUS` where Sunshine <> 'NA') | |
,encord_WindGustSpeed as (select cast(WindGustSpeed as float64) as WindGustSpeed from `pj.ds.weatherAUS` where WindGustSpeed <> 'NA') | |
,encord_WindSpeed9am as (select cast(WindSpeed9am as float64) as WindSpeed9am from `pj.ds.weatherAUS` where WindSpeed9am <> 'NA') | |
,encord_WindSpeed3pm as (select cast(WindSpeed3pm as float64) as WindSpeed3pm from `pj.ds.weatherAUS` where WindSpeed3pm <> 'NA') | |
,encord_Humidity9am as (select cast(Humidity9am as float64) as Humidity9am from `pj.ds.weatherAUS` where Humidity9am <> 'NA') | |
,encord_Humidity3pm as (select cast(Humidity3pm as float64) as Humidity3pm from `pj.ds.weatherAUS` where Humidity3pm <> 'NA') | |
,encord_Pressure9am as (select cast(Pressure9am as float64) as Pressure9am from `pj.ds.weatherAUS` where Pressure9am <> 'NA') | |
,encord_Pressure3pm as (select cast(Pressure3pm as float64) as Pressure3pm from `pj.ds.weatherAUS` where Pressure3pm <> 'NA') | |
,encord_Cloud9am as (select cast(Cloud9am as float64) as Cloud9am from `pj.ds.weatherAUS` where Cloud9am <> 'NA') | |
,encord_Cloud3pm as (select cast(Cloud3pm as float64) as Cloud3pm from `pj.ds.weatherAUS` where Cloud3pm <> 'NA') | |
,encord_Temp9am as (select cast(Temp9am as float64) as Temp9am from `pj.ds.weatherAUS` where Temp9am <> 'NA') | |
,encord_Temp3pm as (select cast(Temp3pm as float64) as Temp3pm from `pj.ds.weatherAUS` where Temp3pm <> 'NA') | |
,statics_columns as ( | |
(select 'MinTemp' as key ,count(*) as n ,avg(MinTemp) as mean ,stddev(MinTemp) as std ,min(MinTemp) as min ,(select q from (select percentile_cont(MinTemp, 0.25) over() as q from encord_MinTemp) group by q) as first_quartile ,(select q from (select percentile_cont(MinTemp, 0.5) over() as q from encord_MinTemp) group by q) as median ,(select q from (select percentile_cont(MinTemp, 0.75) over() as q from encord_MinTemp) group by q) as thrd_quartile ,max(MinTemp) as max from encord_MinTemp) | |
union all (select 'MaxTemp' as key ,count(*) as n ,avg(MaxTemp) as mean ,stddev(MaxTemp) as std ,min(MaxTemp) as min ,(select q from (select percentile_cont(MaxTemp, 0.25) over() as q from encord_MaxTemp) group by q) as first_quartile ,(select q from (select percentile_cont(MaxTemp, 0.5) over() as q from encord_MaxTemp) group by q) as median ,(select q from (select percentile_cont(MaxTemp, 0.75) over() as q from encord_MaxTemp) group by q) as thrd_quartile ,max(MaxTemp) as max from encord_MaxTemp) | |
union all (select 'Rainfall' as key ,count(*) as n ,avg(Rainfall) as mean ,stddev(Rainfall) as std ,min(Rainfall) as min ,(select q from (select percentile_cont(Rainfall, 0.25) over() as q from encord_Rainfall) group by q) as first_quartile ,(select q from (select percentile_cont(Rainfall, 0.5) over() as q from encord_Rainfall) group by q) as median ,(select q from (select percentile_cont(Rainfall, 0.75) over() as q from encord_Rainfall) group by q) as thrd_quartile ,max(Rainfall) as max from encord_Rainfall) | |
union all (select 'Evaporation' as key ,count(*) as n ,avg(Evaporation) as mean ,stddev(Evaporation) as std ,min(Evaporation) as min ,(select q from (select percentile_cont(Evaporation, 0.25) over() as q from encord_Evaporation) group by q) as first_quartile ,(select q from (select percentile_cont(Evaporation, 0.5) over() as q from encord_Evaporation) group by q) as median ,(select q from (select percentile_cont(Evaporation, 0.75) over() as q from encord_Evaporation) group by q) as thrd_quartile ,max(Evaporation) as max from encord_Evaporation) | |
union all (select 'Sunshine' as key ,count(*) as n ,avg(Sunshine) as mean ,stddev(Sunshine) as std ,min(Sunshine) as min ,(select q from (select percentile_cont(Sunshine, 0.25) over() as q from encord_Sunshine) group by q) as first_quartile ,(select q from (select percentile_cont(Sunshine, 0.5) over() as q from encord_Sunshine) group by q) as median ,(select q from (select percentile_cont(Sunshine, 0.75) over() as q from encord_Sunshine) group by q) as thrd_quartile ,max(Sunshine) as max from encord_Sunshine) | |
union all (select 'WindGustSpeed' as key ,count(*) as n ,avg(WindGustSpeed) as mean ,stddev(WindGustSpeed) as std ,min(WindGustSpeed) as min ,(select q from (select percentile_cont(WindGustSpeed, 0.25) over() as q from encord_WindGustSpeed) group by q) as first_quartile ,(select q from (select percentile_cont(WindGustSpeed, 0.5) over() as q from encord_WindGustSpeed) group by q) as median ,(select q from (select percentile_cont(WindGustSpeed, 0.75) over() as q from encord_WindGustSpeed) group by q) as thrd_quartile ,max(WindGustSpeed) as max from encord_WindGustSpeed) | |
union all (select 'WindSpeed9am' as key ,count(*) as n ,avg(WindSpeed9am) as mean ,stddev(WindSpeed9am) as std ,min(WindSpeed9am) as min ,(select q from (select percentile_cont(WindSpeed9am, 0.25) over() as q from encord_WindSpeed9am) group by q) as first_quartile ,(select q from (select percentile_cont(WindSpeed9am, 0.5) over() as q from encord_WindSpeed9am) group by q) as median ,(select q from (select percentile_cont(WindSpeed9am, 0.75) over() as q from encord_WindSpeed9am) group by q) as thrd_quartile ,max(WindSpeed9am) as max from encord_WindSpeed9am) | |
union all (select 'WindSpeed3pm' as key ,count(*) as n ,avg(WindSpeed3pm) as mean ,stddev(WindSpeed3pm) as std ,min(WindSpeed3pm) as min ,(select q from (select percentile_cont(WindSpeed3pm, 0.25) over() as q from encord_WindSpeed3pm) group by q) as first_quartile ,(select q from (select percentile_cont(WindSpeed3pm, 0.5) over() as q from encord_WindSpeed3pm) group by q) as median ,(select q from (select percentile_cont(WindSpeed3pm, 0.75) over() as q from encord_WindSpeed3pm) group by q) as thrd_quartile ,max(WindSpeed3pm) as max from encord_WindSpeed3pm) | |
union all (select 'Humidity9am' as key ,count(*) as n ,avg(Humidity9am) as mean ,stddev(Humidity9am) as std ,min(Humidity9am) as min ,(select q from (select percentile_cont(Humidity9am, 0.25) over() as q from encord_Humidity9am) group by q) as first_quartile ,(select q from (select percentile_cont(Humidity9am, 0.5) over() as q from encord_Humidity9am) group by q) as median ,(select q from (select percentile_cont(Humidity9am, 0.75) over() as q from encord_Humidity9am) group by q) as thrd_quartile ,max(Humidity9am) as max from encord_Humidity9am) | |
union all (select 'Humidity3pm' as key ,count(*) as n ,avg(Humidity3pm) as mean ,stddev(Humidity3pm) as std ,min(Humidity3pm) as min ,(select q from (select percentile_cont(Humidity3pm, 0.25) over() as q from encord_Humidity3pm) group by q) as first_quartile ,(select q from (select percentile_cont(Humidity3pm, 0.5) over() as q from encord_Humidity3pm) group by q) as median ,(select q from (select percentile_cont(Humidity3pm, 0.75) over() as q from encord_Humidity3pm) group by q) as thrd_quartile ,max(Humidity3pm) as max from encord_Humidity3pm) | |
union all (select 'Pressure9am' as key ,count(*) as n ,avg(Pressure9am) as mean ,stddev(Pressure9am) as std ,min(Pressure9am) as min ,(select q from (select percentile_cont(Pressure9am, 0.25) over() as q from encord_Pressure9am) group by q) as first_quartile ,(select q from (select percentile_cont(Pressure9am, 0.5) over() as q from encord_Pressure9am) group by q) as median ,(select q from (select percentile_cont(Pressure9am, 0.75) over() as q from encord_Pressure9am) group by q) as thrd_quartile ,max(Pressure9am) as max from encord_Pressure9am) | |
union all (select 'Pressure3pm' as key ,count(*) as n ,avg(Pressure3pm) as mean ,stddev(Pressure3pm) as std ,min(Pressure3pm) as min ,(select q from (select percentile_cont(Pressure3pm, 0.25) over() as q from encord_Pressure3pm) group by q) as first_quartile ,(select q from (select percentile_cont(Pressure3pm, 0.5) over() as q from encord_Pressure3pm) group by q) as median ,(select q from (select percentile_cont(Pressure3pm, 0.75) over() as q from encord_Pressure3pm) group by q) as thrd_quartile ,max(Pressure3pm) as max from encord_Pressure3pm) | |
union all (select 'Cloud9am' as key ,count(*) as n ,avg(Cloud9am) as mean ,stddev(Cloud9am) as std ,min(Cloud9am) as min ,(select q from (select percentile_cont(Cloud9am, 0.25) over() as q from encord_Cloud9am) group by q) as first_quartile ,(select q from (select percentile_cont(Cloud9am, 0.5) over() as q from encord_Cloud9am) group by q) as median ,(select q from (select percentile_cont(Cloud9am, 0.75) over() as q from encord_Cloud9am) group by q) as thrd_quartile ,max(Cloud9am) as max from encord_Cloud9am) | |
union all (select 'Cloud3pm' as key ,count(*) as n ,avg(Cloud3pm) as mean ,stddev(Cloud3pm) as std ,min(Cloud3pm) as min ,(select q from (select percentile_cont(Cloud3pm, 0.25) over() as q from encord_Cloud3pm) group by q) as first_quartile ,(select q from (select percentile_cont(Cloud3pm, 0.5) over() as q from encord_Cloud3pm) group by q) as median ,(select q from (select percentile_cont(Cloud3pm, 0.75) over() as q from encord_Cloud3pm) group by q) as thrd_quartile ,max(Cloud3pm) as max from encord_Cloud3pm) | |
union all (select 'Temp9am' as key ,count(*) as n ,avg(Temp9am) as mean ,stddev(Temp9am) as std ,min(Temp9am) as min ,(select q from (select percentile_cont(Temp9am, 0.25) over() as q from encord_Temp9am) group by q) as first_quartile ,(select q from (select percentile_cont(Temp9am, 0.5) over() as q from encord_Temp9am) group by q) as median ,(select q from (select percentile_cont(Temp9am, 0.75) over() as q from encord_Temp9am) group by q) as thrd_quartile ,max(Temp9am) as max from encord_Temp9am) | |
union all (select 'Temp3pm' as key ,count(*) as n ,avg(Temp3pm) as mean ,stddev(Temp3pm) as std ,min(Temp3pm) as min ,(select q from (select percentile_cont(Temp3pm, 0.25) over() as q from encord_Temp3pm) group by q) as first_quartile ,(select q from (select percentile_cont(Temp3pm, 0.5) over() as q from encord_Temp3pm) group by q) as median ,(select q from (select percentile_cont(Temp3pm, 0.75) over() as q from encord_Temp3pm) group by q) as thrd_quartile ,max(Temp3pm) as max from encord_Temp3pm) | |
) | |
select * from statics_columns order by key; |
Author
sh16ma
commented
Nov 9, 2021
💡 GoogleSheets利用
カラムを「A1」列に転記。
以下の関数を順番にコピペ。
「B1」列にコピペ
="encord_"&A1&" as (select cast("&A1&" as float64) as "&A1&" from `pj.ds.weatherAUS` where "&A1&" <> 'NA')"
「B2」列以降にコピペ
=",encord_"&A2&" as (select cast("&A2&" as float64) as "&A2&" from `pj.ds.weatherAUS` where "&A2&" <> 'NA')"
「C1」列にコピペ
="(select '"&A1&"' as key ,count(*) as n ,avg("&A1&") as mean ,stddev("&A1&") as std ,min("&A1&") as min ,(select q from (select percentile_cont("&A1&", 0.25) over() as q from encord_"&A1&") group by q) as first_quartile ,(select q from (select percentile_cont("&A1&", 0.5) over() as q from encord_"&A1&") group by q) as median ,(select q from (select percentile_cont("&A1&", 0.75) over() as q from encord_"&A1&") group by q) as thrd_quartile ,max("&A1&") as max from encord_"&A1&")"
「C2」列以降にコピペ
="union all (select '"&A2&"' as key ,count(*) as n ,avg("&A2&") as mean ,stddev("&A2&") as std ,min("&A2&") as min ,(select q from (select percentile_cont("&A2&", 0.25) over() as q from encord_"&A2&") group by q) as first_quartile ,(select q from (select percentile_cont("&A2&", 0.5) over() as q from encord_"&A2&") group by q) as median ,(select q from (select percentile_cont("&A2&", 0.75) over() as q from encord_"&A2&") group by q) as thrd_quartile ,max("&A2&") as max from encord_"&A2&")"
年別×気温区分別:要約統計量
with
MinTemp_encd as (
select
Date
, cast(MinTemp as float64) as MinTemp
from
`prj-test3.case_study1.weatherAUS`
where
MinTemp <> 'NA'
)
, MinTemp_quatile as (
select
year
, max(MinTemp_q1) as MinTemp_q1
, max(MinTemp_q2) as MinTemp_q2 -- median
, max(MinTemp_q3) as MinTemp_q3
from
(
select
format_date('%Y', Date) as year
, percentile_cont(MinTemp, 0.25)over(partition by format_date('%Y', Date)) as MinTemp_q1
, percentile_cont(MinTemp, 0.5)over(partition by format_date('%Y', Date)) as MinTemp_q2
, percentile_cont(MinTemp, 0.75)over(partition by format_date('%Y', Date)) as MinTemp_q3
from
MinTemp_encd
)
group by
year
)
, MinTemp_statistic as (
select
year
, st.MinTemp_mean
, st.MinTemp_std
, st.MinTemp_min
, qt.MinTemp_q1
, qt.MinTemp_q2
, qt.MinTemp_q3
, st.MinTemp_max
from
(select
format_date('%Y', Date) as year
, avg(MinTemp) as MinTemp_mean
, stddev_pop(MinTemp) as MinTemp_std
, min(MinTemp) as MinTemp_min
, max(MinTemp) as MinTemp_max
from
MinTemp_encd
group by
year
) as st
join MinTemp_quatile as qt
using(year)
)
, MaxTemp_encd as (
select
Date
, cast(MaxTemp as float64) as MaxTemp
from
`prj-test3.case_study1.weatherAUS`
where
MaxTemp <> 'NA'
)
, MaxTemp_quatile as (
select
year
, max(MaxTemp_q1) as MaxTemp_q1
, max(MaxTemp_q2) as MaxTemp_q2 -- median
, max(MaxTemp_q3) as MaxTemp_q3
from
(
select
format_date('%Y', Date) as year
, percentile_cont(MaxTemp, 0.25)over(partition by format_date('%Y', Date)) as MaxTemp_q1
, percentile_cont(MaxTemp, 0.5)over(partition by format_date('%Y', Date)) as MaxTemp_q2
, percentile_cont(MaxTemp, 0.75)over(partition by format_date('%Y', Date)) as MaxTemp_q3
from
MaxTemp_encd
)
group by
year
)
, MaxTemp_statistic as (
select
year
, st.MaxTemp_mean
, st.MaxTemp_std
, st.MaxTemp_min
, qt.MaxTemp_q1
, qt.MaxTemp_q2
, qt.MaxTemp_q3
, st.MaxTemp_max
from
(select
format_date('%Y', Date) as year
, avg(MaxTemp) as MaxTemp_mean
, stddev_pop(MaxTemp) as MaxTemp_std
, min(MaxTemp) as MaxTemp_min
, max(MaxTemp) as MaxTemp_max
from
MaxTemp_encd
group by
year
) as st
join MaxTemp_quatile as qt
using(year)
)
/*****************
* OUT-PUT
*****************/
select
*
from
MinTemp_statistic
join
MaxTemp_statistic using(year)
order by
year
;
クエリ完了(経過時間: 2.2 秒、処理されたバイト数: 2.7 MB)
※ 苦戦したポイント
- ウィンドウ関数のOVER句でPARTITIONBYを年別に切り分ける点。
- 親クエリで子クエリ(サブクエリ)の値を取得の際に、集計が伴う点(maxでもminでも可能 ∵同一数字)。
MaxTemp_quatile as ( select year , max(MaxTemp_q1) as MaxTemp_q1 , max(MaxTemp_q2) as MaxTemp_q2 -- median , max(MaxTemp_q3) as MaxTemp_q3 from ( # 年にフォーマット変換して、四分位数を取得 select format_date('%Y', Date) as year , percentile_cont(MaxTemp, 0.25)over(partition by format_date('%Y', Date)) as MaxTemp_q1 , percentile_cont(MaxTemp, 0.5)over(partition by format_date('%Y', Date)) as MaxTemp_q2 , percentile_cont(MaxTemp, 0.75)over(partition by format_date('%Y', Date)) as MaxTemp_q3 from MaxTemp_encd ) group by year )
ロケーション別×気温区分別 : 要約統計量
with
loc_days as (
select
Location
, count(Location) as days
from
`pj.ds.weatherAUS`
group by
Location
)
, MinTemp_encd as (
select
Location
, Date
, cast(MinTemp as float64) as MinTemp
from
`pj.ds.weatherAUS`
where
MinTemp <> 'NA'
)
, MinTemp_quatile as (
select
Location
, max(MinTemp_q1) as MinTemp_q1
, max(MinTemp_q2) as MinTemp_q2 -- median
, max(MinTemp_q3) as MinTemp_q3
from
(
select
Location
, percentile_cont(MinTemp, 0.25)over(partition by Location) as MinTemp_q1
, percentile_cont(MinTemp, 0.5)over(partition by Location) as MinTemp_q2
, percentile_cont(MinTemp, 0.75)over(partition by Location) as MinTemp_q3
from
MinTemp_encd
)
group by
Location
)
, MinTemp_statistic as (
select
Location
, st.MinTemp_mean
, st.MinTemp_std
, st.MinTemp_min
, qt.MinTemp_q1
, qt.MinTemp_q2
, qt.MinTemp_q3
, st.MinTemp_max
from
(select
Location
, avg(MinTemp) as MinTemp_mean
, stddev_pop(MinTemp) as MinTemp_std
, min(MinTemp) as MinTemp_min
, max(MinTemp) as MinTemp_max
from
MinTemp_encd
group by
Location
) as st
join MinTemp_quatile as qt
using(Location)
)
, MaxTemp_encd as (
select
Location
, Date
, cast(MaxTemp as float64) as MaxTemp
from
`pj.ds.weatherAUS`
where
MaxTemp <> 'NA'
)
, MaxTemp_quatile as (
select
Location
, max(MaxTemp_q1) as MaxTemp_q1
, max(MaxTemp_q2) as MaxTemp_q2 -- median
, max(MaxTemp_q3) as MaxTemp_q3
from
(
select
Location
, percentile_cont(MaxTemp, 0.25)over(partition by Location) as MaxTemp_q1
, percentile_cont(MaxTemp, 0.5)over(partition by Location) as MaxTemp_q2
, percentile_cont(MaxTemp, 0.75)over(partition by Location) as MaxTemp_q3
from
MaxTemp_encd
)
group by
Location
)
, MaxTemp_statistic as (
select
Location
, st.MaxTemp_mean
, st.MaxTemp_std
, st.MaxTemp_min
, qt.MaxTemp_q1
, qt.MaxTemp_q2
, qt.MaxTemp_q3
, st.MaxTemp_max
from
(select
Location
, avg(MaxTemp) as MaxTemp_mean
, stddev_pop(MaxTemp) as MaxTemp_std
, min(MaxTemp) as MaxTemp_min
, max(MaxTemp) as MaxTemp_max
from
MaxTemp_encd
group by
Location
) as st
join MaxTemp_quatile as qt using(Location)
)
/*****************
* OUT-PUT
*****************/
select
*
from
loc_days
join
MinTemp_statistic using(Location)
join
MaxTemp_statistic using(Location)
order by
Location
;
クエリ完了(経過時間: 2.4 秒、処理されたバイト数: 3 MB)
※ サンプルデータを利用
天候区分別×気温区分別:要約統計量
with
MinTemp_encd as (
select
Date
, MinTemp
, CategoryTemp
from(
select
Date
, cast(MinTemp as float64) as MinTemp
, cast(Cloud9am as float64) as Cloud9am
, cast(Cloud3pm as float64) as Cloud3pm
, case
when (cast(Cloud9am as float64)+cast(Cloud3pm as float64))/2 <= 1 then 'Sunny+'
when (cast(Cloud9am as float64)+cast(Cloud3pm as float64))/2 between 2 and 8 then 'Sunny'
when (cast(Cloud9am as float64)+cast(Cloud3pm as float64))/2 >= 9 then 'Cloudy'
end as CategoryTemp
from
`prj-test3.case_study1.weatherAUS`
where
MinTemp <> 'NA'
and
Cloud9am <> 'NA'
and
Cloud3pm <> 'NA'
)
where
CategoryTemp = 'Sunny+'
)
, MinTemp_quatile as (
select
year
, max(MinTemp_q1) as MinTemp_q1
, max(MinTemp_q2) as MinTemp_q2 -- median
, max(MinTemp_q3) as MinTemp_q3
from
(
select
format_date('%Y', Date) as year
, percentile_cont(MinTemp, 0.25)over(partition by format_date('%Y', Date)) as MinTemp_q1
, percentile_cont(MinTemp, 0.5)over(partition by format_date('%Y', Date)) as MinTemp_q2
, percentile_cont(MinTemp, 0.75)over(partition by format_date('%Y', Date)) as MinTemp_q3
from
MinTemp_encd
)
group by
year
)
, MinTemp_statistic as (
select
year
, st.MinTemp_mean
, st.MinTemp_std
, st.MinTemp_min
, qt.MinTemp_q1
, qt.MinTemp_q2
, qt.MinTemp_q3
, st.MinTemp_max
from
(select
format_date('%Y', Date) as year
, avg(MinTemp) as MinTemp_mean
, stddev_pop(MinTemp) as MinTemp_std
, min(MinTemp) as MinTemp_min
, max(MinTemp) as MinTemp_max
from
MinTemp_encd
group by
year
) as st
join MinTemp_quatile as qt
using(year)
)
, MaxTemp_encd as (
select
Date
, MaxTemp
, CategoryTemp
from(
select
Date
, cast(MaxTemp as float64) as MaxTemp
, cast(Cloud9am as float64) as Cloud9am
, cast(Cloud3pm as float64) as Cloud3pm
, case
when (cast(Cloud9am as float64)+cast(Cloud3pm as float64))/2 <= 1 then 'Sunny+'
when (cast(Cloud9am as float64)+cast(Cloud3pm as float64))/2 between 2 and 8 then 'Sunny'
when (cast(Cloud9am as float64)+cast(Cloud3pm as float64))/2 >= 9 then 'Cloudy'
end as CategoryTemp
from
`prj-test3.case_study1.weatherAUS`
where
MaxTemp <> 'NA'
and
Cloud9am <> 'NA'
and
Cloud3pm <> 'NA'
)
where
CategoryTemp = 'Sunny+'
)
, MaxTemp_quatile as (
select
year
, max(MaxTemp_q1) as MaxTemp_q1
, max(MaxTemp_q2) as MaxTemp_q2 -- median
, max(MaxTemp_q3) as MaxTemp_q3
from
(
select
format_date('%Y', Date) as year
, percentile_cont(MaxTemp, 0.25)over(partition by format_date('%Y', Date)) as MaxTemp_q1
, percentile_cont(MaxTemp, 0.5)over(partition by format_date('%Y', Date)) as MaxTemp_q2
, percentile_cont(MaxTemp, 0.75)over(partition by format_date('%Y', Date)) as MaxTemp_q3
from
MaxTemp_encd
)
group by
year
)
, MaxTemp_statistic as (
select
year
, st.MaxTemp_mean
, st.MaxTemp_std
, st.MaxTemp_min
, qt.MaxTemp_q1
, qt.MaxTemp_q2
, qt.MaxTemp_q3
, st.MaxTemp_max
from
(select
format_date('%Y', Date) as year
, avg(MaxTemp) as MaxTemp_mean
, stddev_pop(MaxTemp) as MaxTemp_std
, min(MaxTemp) as MaxTemp_min
, max(MaxTemp) as MaxTemp_max
from
MaxTemp_encd
group by
year
) as st
join MaxTemp_quatile as qt
using(year)
)
/*****************
* OUT-PUT
*****************/
select
*
from
MinTemp_statistic
join
MaxTemp_statistic using(year)
order by
year
;
クエリ完了(経過時間: 3.5 秒、処理されたバイト数: 3.6 MB)
風向き区分別 × 風量:要約統計量
with
WindGustDir_encd as (
select
WindGustDir
, cast(WindGustSpeed as float64) as WindGustSpeed
from
`pj.ds.weatherAUS`
where
WindGustDir <> 'NA'
and
WindGustSpeed <> 'NA'
)
, WindGustDir_quatile as (
select
WindGustDir
, max(WindGustSpeed_q1) as WindGustSpeed_q1
, max(WindGustSpeed_q2) as WindGustSpeed_q2
, max(WindGustSpeed_q3) as WindGustSpeed_q3
from
(
select
WindGustDir
, percentile_cont(WindGustSpeed, 0.25)over(partition by WindGustDir) as WindGustSpeed_q1
, percentile_cont(WindGustSpeed, 0.5)over(partition by WindGustDir) as WindGustSpeed_q2
, percentile_cont(WindGustSpeed, 0.75)over(partition by WindGustDir) as WindGustSpeed_q3
from
WindGustDir_encd
)
group by
WindGustDir
)
, WindGustDir_statistic as (
select
WindGustDir
, st.WindGustSpeed_mean
, st.WindGustSpeed_std
, st.WindGustSpeed_min
, qt.WindGustSpeed_q1
, qt.WindGustSpeed_q2
, qt.WindGustSpeed_q3
, st.WindGustSpeed_max
from
(select
WindGustDir
, avg(WindGustSpeed) as WindGustSpeed_mean
, stddev_pop(WindGustSpeed) as WindGustSpeed_std
, min(WindGustSpeed) as WindGustSpeed_min
, max(WindGustSpeed) as WindGustSpeed_max
from
WindGustDir_encd
group by
WindGustDir
) as st
join WindGustDir_quatile as qt
using(WindGustDir)
)
/*****************
* OUT-PUT
*****************/
select * from WindGustDir_statistic order by WindGustDir;
クエリ完了(経過時間: 3.2 秒、処理されたバイト数: 1.1 MB)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment