Skip to content

Instantly share code, notes, and snippets.

View nullvalues2.sql
/** 5.2 NULL values **/
COUNT(CASE WHEN location_var is NULL THEN id_var END) AS NULL_CNT
View nullvalues.sql
/** 5. NULL values **/
SELECT
COUNT(*) AS total_cnt,
COUNT(CASE WHEN location_var = 'California' THEN id_var END) AS california_CNT,
COUNT(CASE WHEN location_var <> 'California' THEN id_var END) AS non_california_CNT
FROM
userlogin
WHERE id_var = '0155'
View logical_par.sql
/** 4.2 User logins in NYC and in Year 2018 or Num_Var >= 1000 -- parenthesis **/
WHERE
location_var = 'NYC'
AND (
YEAR(date_var) = '2018'
OR num_var >= 1000
)
View logical.sql
/** 4. User logins in NYC and in Year 2018 or Num_Var >= 1000**/
SELECT *
FROM
userlogin
WHERE
location_var = 'NYC'
AND YEAR(date_var) = '2018'
OR num_var >= 1000
View groupby.sql
/** 2. Return more information about login records **/
GROUP BY id_var, user_var, date_var, location_var
View overPartitionby.sql
/** 2.2 Return more information about login records -- PARTITION BY **/
SELECT *
FROM
(
SELECT
dat1.*,
SUM(CASE WHEN location_var = 'NYC' THEN 1 END) OVER (PARTITION BY id_var) AS nyc_cnt,
SUM(CASE WHEN location_var = 'Illinois' THEN 1 END) OVER (PARTITION BY id_var) AS illinois_cnt
FROM
userlogin dat1
View sequential.sql
/** 3. User logins in California followed by Massachusetts **/
SELECT *
FROM
(
SELECT
dat.*,
LEAD(location_var) OVER (PARTITION BY id_var ORDER BY date_var) AS next_location
FROM
userlogin dat
) dat2
View AND_logic.sql
/** 1.2 User logins in both NYC and Illinois -- logic AND **/
SELECT
id_var, user_var
FROM
(
SELECT
id_var,
user_var,
SUM(CASE WHEN location_var = 'NYC' THEN 1 ELSE 0 END) AS nyc_cnt,
SUM(CASE WHEN location_var = 'Illinois' THEN 1 ELSE 0 END) AS illinois_cnt
View IN_operator.sql
/** 1. User logins in both NYC and Illinois **/
SELECT
DISTINCT id_var, user_var
FROM
userlogin
WHERE
location_var IN ('NYC', 'Illinois')
View parse_series.py
## Parse tuples of values into their own columns
### Step1: Convert each element to their own series
df_ = df["Nums"].apply(pd.Series)
Out[39]:
0 1
index0 976 660
index1 984 360
index2 246 127
index3 252 345