SQLを用いての集計・分析を行うと、クエリだけで軽く数百行を超えることがしばしばある。
おおまかに、どのエリアにどのテーブル(整形したサブクエリ)が存在しているのかを予め決めておくことで、
容易にクエリを記述・読み返すことができる。
※ 大前提: `WITH句`を用いて、複数のテーブル(整形したデータ)を記述して、最終的にまとめていく書き方を想定している。
# DLした「.csv」ファイルを一括文字コード変換 | |
# cf. https://dse-souken.com/2021/06/06/tec/ | |
# HowToUse | |
# 1. DLしたファイルをまとめてフォルダー化して「enc」と名付ける | |
# 2.「enc」フォルダー内にあるDLしたファイルを全てデスクトップへコピペ | |
# 3.「jis_encoding.py」を実行 | |
import glob |
#standardSQL | |
declare sd array<string>; | |
declare ed array<string>; | |
declare start_days array<string>; | |
declare end_days array<string>; | |
declare i int64 default 1; | |
set sd = ['2019-01-01', '2019-02-01', '2019-03-01']; | |
set ed = ['2019-04-30', '2019-08-31', '2019-12-31']; | |
#standaardSQL | |
# 集計期間 | |
decleare term_start string default '2020-01-01'; | |
decleare term_end string default '2020-12-31'; | |
decleare prep_table_suffix string; | |
set prep_table_suffix = ( | |
select format_date('%Y%m%d', current_date('Asia/Tokyo')) as TODAY | |
); |
#standaardSQL | |
with gender_era_amount as ( | |
select | |
c.gender_cd as gender | |
, trunc(c.age/10) * 10 as era | |
, sum(r.amount) as amount | |
from `prj-test3.100knocks.customer` as c | |
join `prj-test3.100knocks.receipt` as r | |
using(customer_id) |
#standaardSQL | |
select | |
Location | |
, datetime_diff(new_date, old_date, day) as days_diff | |
from( | |
select | |
-- * | |
Date | |
, Location |
#standaardSQL | |
with | |
encord_MinTemp as (select cast(MinTemp as float64) as MinTemp from `pj.ds.weatherAUS` where MinTemp <> 'NA') | |
,encord_MaxTemp as (select cast(MaxTemp as float64) as MaxTemp from `pj.ds.weatherAUS` where MaxTemp <> 'NA') | |
,encord_Rainfall as (select cast(Rainfall as float64) as Rainfall from `pj.ds.weatherAUS` where Rainfall <> 'NA') | |
,encord_Evaporation as (select cast(Evaporation as float64) as Evaporation from `pj.ds.weatherAUS` where Evaporation <> 'NA') | |
,encord_Sunshine as (select cast(Sunshine as float64) as Sunshine from `pj.ds.weatherAUS` where Sunshine <> 'NA') | |
,encord_WindGustSpeed as (select cast(WindGustSpeed as float64) as WindGustSpeed from `pj.ds.weatherAUS` where WindGustSpeed <> 'NA') | |
,encord_WindSpeed9am as (select cast(WindSpeed9am as float64) as WindSpeed9am from `pj.ds.weatherAUS` where WindSpeed9am <> 'NA') |
#standaardSQL | |
with | |
null_tb as ( | |
select | |
(select count(*) from `pj.ds.weatherAUS` where Location="NA") as Location | |
, (select count(*) from `pj.ds.weatherAUS` where MinTemp="NA") as MinTemp | |
, (select count(*) from `pj.ds.weatherAUS` where MaxTemp="NA") as MaxTemp | |
, (select count(*) from `pj.ds.weatherAUS` where Rainfall="NA") as Rainfall | |
, (select count(*) from `pj.ds.weatherAUS` where Evaporation="NA") as Evaporation |
#standaardSQL | |
select | |
(select count(*) from `pj.ds.tb`) as no_of_row | |
, count(*) as no_of_col | |
from | |
`pj.ds.INFORMATION_SCHEMA.COLUMNS` | |
where | |
table_name = `{tb}` -- 必要 に応じて絞り込み | |
; |
/********************************************************* | |
* ID-POS | |
* | |
* POSデータのDWH(約80億レコード/1テーブル)を全て回すのはリスク。 | |
* 「WHERE句」を用いて、レコード取得制限の掛ける絞り込み。 | |
*********************************************************/ | |
with | |
PREP_POS as ( | |
select |