Skip to content

Instantly share code, notes, and snippets.

View sh16ma's full-sized avatar
🐻‍❄️

sh16ma sh16ma

🐻‍❄️
View GitHub Profile
@sh16ma
sh16ma / jis_encoding.py
Last active November 6, 2022 06:38
#🐍 #Python #Eecel #Encoding
# DLした「.csv」ファイルを一括文字コード変換
# cf. https://dse-souken.com/2021/06/06/tec/
# HowToUse
# 1. DLしたファイルをまとめてフォルダー化して「enc」と名付ける
# 2.「enc」フォルダー内にあるDLしたファイルを全てデスクトップへコピペ
# 3.「jis_encoding.py」を実行
import glob
@sh16ma
sh16ma / aggligation_outline_at_bq.md
Last active October 21, 2023 11:02
#🔎 #BigQuery #OUTLINE

BigQuery 集計・分析の為の記述構成

SQLを用いての集計・分析を行うと、クエリだけで軽く数百行を超えることがしばしばある。
おおまかに、どのエリアにどのテーブル(整形したサブクエリ)が存在しているのかを予め決めておくことで、
容易にクエリを記述・読み返すことができる。

※ 大前提: `WITH句`を用いて、複数のテーブル(整形したデータ)を記述して、最終的にまとめていく書き方を想定している。

|| BigQuery - Outline

@sh16ma
sh16ma / while.sql
Last active October 12, 2022 01:39
#🔎 #BigQuery #BigQueryScripting #while #繰り返し処理
#standardSQL
declare sd array<string>;
declare ed array<string>;
declare start_days array<string>;
declare end_days array<string>;
declare i int64 default 1;
set sd = ['2019-01-01', '2019-02-01', '2019-03-01'];
set ed = ['2019-04-30', '2019-08-31', '2019-12-31'];
@sh16ma
sh16ma / table_suffix.sql
Last active March 1, 2023 06:01
#🔎 #BigQuery #BigQueryScripting #_TABLE_SUFFIX #ga_data
#standaardSQL
# 集計期間
decleare term_start string default '2020-01-01';
decleare term_end string default '2020-12-31';
decleare prep_table_suffix string;
set prep_table_suffix = (
select format_date('%Y%m%d', current_date('Asia/Tokyo')) as TODAY
);
@sh16ma
sh16ma / pivot.sql
Last active January 19, 2022 10:09
#🔎 #BigQuery #クロス集計 #DS100本ノック #SQL
#standaardSQL
with gender_era_amount as (
select
c.gender_cd as gender
, trunc(c.age/10) * 10 as era
, sum(r.amount) as amount
from `prj-test3.100knocks.customer` as c
join `prj-test3.100knocks.receipt` as r
using(customer_id)
@sh16ma
sh16ma / days_count.sql
Last active January 19, 2022 10:09
#🔎 #BigQuery #日数差 #ウィンドウ関数 #ウィンドウフレーム
#standaardSQL
select
Location
, datetime_diff(new_date, old_date, day) as days_diff
from(
select
-- *
Date
, Location
@sh16ma
sh16ma / statistics_columns.sql
Last active February 2, 2022 07:42
#🔎 #BigQuery #カラム別要約統計量 #Pandasでええやん #気候データ(サンプル利用)
#standaardSQL
with
encord_MinTemp as (select cast(MinTemp as float64) as MinTemp from `pj.ds.weatherAUS` where MinTemp <> 'NA')
,encord_MaxTemp as (select cast(MaxTemp as float64) as MaxTemp from `pj.ds.weatherAUS` where MaxTemp <> 'NA')
,encord_Rainfall as (select cast(Rainfall as float64) as Rainfall from `pj.ds.weatherAUS` where Rainfall <> 'NA')
,encord_Evaporation as (select cast(Evaporation as float64) as Evaporation from `pj.ds.weatherAUS` where Evaporation <> 'NA')
,encord_Sunshine as (select cast(Sunshine as float64) as Sunshine from `pj.ds.weatherAUS` where Sunshine <> 'NA')
,encord_WindGustSpeed as (select cast(WindGustSpeed as float64) as WindGustSpeed from `pj.ds.weatherAUS` where WindGustSpeed <> 'NA')
,encord_WindSpeed9am as (select cast(WindSpeed9am as float64) as WindSpeed9am from `pj.ds.weatherAUS` where WindSpeed9am <> 'NA')
@sh16ma
sh16ma / null.sql
Last active January 19, 2022 10:10
#🔎 #BigQuery #EDA #転置行列
#standaardSQL
with
null_tb as (
select
(select count(*) from `pj.ds.weatherAUS` where Location="NA") as Location
, (select count(*) from `pj.ds.weatherAUS` where MinTemp="NA") as MinTemp
, (select count(*) from `pj.ds.weatherAUS` where MaxTemp="NA") as MaxTemp
, (select count(*) from `pj.ds.weatherAUS` where Rainfall="NA") as Rainfall
, (select count(*) from `pj.ds.weatherAUS` where Evaporation="NA") as Evaporation
@sh16ma
sh16ma / shape.sql
Last active January 19, 2022 10:10
#🔎 #BigQuery #テーブルデータ確認 #行列 #sahpe的な
#standaardSQL
select
(select count(*) from `pj.ds.tb`) as no_of_row
, count(*) as no_of_col
from
`pj.ds.INFORMATION_SCHEMA.COLUMNS`
where
table_name = `{tb}` -- 必要 に応じて絞り込み
;
@sh16ma
sh16ma / aggregation_technique_at_td.sql
Last active October 20, 2023 08:22
#💎 #TreasuerData #集計テク #Presto #SQL #構成比
/*********************************************************
* ID-POS
*
*    POSデータのDWH(約80億レコード/1テーブル)を全て回すのはリスク。
*    「WHERE句」を用いて、レコード取得制限の掛ける絞り込み。
*********************************************************/
with
PREP_POS as (
select