Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save madilk/d688b53c3bea1e85a04ce58a6530f021 to your computer and use it in GitHub Desktop.
Save madilk/d688b53c3bea1e85a04ce58a6530f021 to your computer and use it in GitHub Desktop.
Tidyquery SQL queries for dataframes inside R programming

#Extract store names in dplyr via mutate

data_store<- data%>%
mutate(Store=
ifelse(str_detect(OrderId, "US"),"US",
ifelse(str_detect(OrderId, "CA"),"CA",
ifelse(str_detect(OrderId, "UK"),"UK",
"Other"))))
View(data_store)

#Total transactions + Qty_sold WHERE Category1 AND Dates
#Within 1/1/2021 and 1/7/2021

query("SELECT
COUNT_DISTINCT (OrderId) AS total_trans,
SUM (Qty_sold) AS total_Qty FROM
data
WHERE Category = 'Category1' AND
(Date BETWEEN TO_DATE('2021-1-1') AND TO_DATE('2021-1-7'))")

#Total transactions + Qty_sold WHERE Category1

query("SELECT
COUNT_DISTINCT (OrderId) AS total_trans,
SUM (Qty_sold) AS total_Qty FROM
data
WHERE Category = 'Category1'")

#Total transactions + Qty_sold

query("SELECT COUNT_DISTINCT (OrderId) AS total_trans,
SUM (Qty_sold) AS total_Qty FROM data")

#Total transactions in dataset

query("SELECT COUNT_DISTINCT (OrderId) AS transactions FROM data")

##Use mutate / dplyr in same function as SQL
data%>%
mutate(Store=
ifelse(str_detect(OrderId, "US"),"US",
ifelse(str_detect(OrderId, "CA"),"CA",
ifelse(str_detect(OrderId, "UK"),"UK",
"Other"))))%>%
query("SELECT
Store,
COUNT_DISTINCT (OrderId) AS total_trans,
SUM (Qty_sold) AS total_Qty
WHERE Category = 'Category1' AND
(Date BETWEEN TO_DATE('2021-1-1') AND TO_DATE('2021-1-7'))
GROUP BY Store")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment