Skip to content

Instantly share code, notes, and snippets.

@YiLi225
YiLi225 / read_in_data.R
Created Apr 1, 2020
Read in the series datasets
View read_in_data.R
######## Point to your data directory
series_all_files = list.files(series_data_dir)
series_data_files = series_all_files[grepl('.csv', series_all_files)]
print(sprintf('Total data files = %s', length(series_data_files)))
series_data_ = lapply(series_data_files,
function(i) {
dat = read.csv(paste0(series_data_dir, '/', i), stringsAsFactors = FALSE)
file_ = gsub('.csv', '', i)
View AND_logic.sql
/** 1.2 User logins in both NYC and Illinois -- logic AND **/
SELECT
id_var, user_var
FROM
(
SELECT
id_var,
user_var,
SUM(CASE WHEN location_var = 'NYC' THEN 1 ELSE 0 END) AS nyc_cnt,
SUM(CASE WHEN location_var = 'Illinois' THEN 1 ELSE 0 END) AS illinois_cnt
View logical_par.sql
/** 4.2 User logins in NYC and in Year 2018 or Num_Var >= 1000 -- parenthesis **/
WHERE
location_var = 'NYC'
AND (
YEAR(date_var) = '2018'
OR num_var >= 1000
)
View nullvalues2.sql
/** 5.2 NULL values **/
COUNT(CASE WHEN location_var is NULL THEN id_var END) AS NULL_CNT
View groupby.sql
/** 2. Return more information about login records **/
GROUP BY id_var, user_var, date_var, location_var
View nullvalues.sql
/** 5. NULL values **/
SELECT
COUNT(*) AS total_cnt,
COUNT(CASE WHEN location_var = 'California' THEN id_var END) AS california_CNT,
COUNT(CASE WHEN location_var <> 'California' THEN id_var END) AS non_california_CNT
FROM
userlogin
WHERE id_var = '0155'
View logical.sql
/** 4. User logins in NYC and in Year 2018 or Num_Var >= 1000**/
SELECT *
FROM
userlogin
WHERE
location_var = 'NYC'
AND YEAR(date_var) = '2018'
OR num_var >= 1000
View sequential.sql
/** 3. User logins in California followed by Massachusetts **/
SELECT *
FROM
(
SELECT
dat.*,
LEAD(location_var) OVER (PARTITION BY id_var ORDER BY date_var) AS next_location
FROM
userlogin dat
) dat2
View overPartitionby.sql
/** 2.2 Return more information about login records -- PARTITION BY **/
SELECT *
FROM
(
SELECT
dat1.*,
SUM(CASE WHEN location_var = 'NYC' THEN 1 END) OVER (PARTITION BY id_var) AS nyc_cnt,
SUM(CASE WHEN location_var = 'Illinois' THEN 1 END) OVER (PARTITION BY id_var) AS illinois_cnt
FROM
userlogin dat1
View IN_operator.sql
/** 1. User logins in both NYC and Illinois **/
SELECT
DISTINCT id_var, user_var
FROM
userlogin
WHERE
location_var IN ('NYC', 'Illinois')