Skip to content

Instantly share code, notes, and snippets.

@abicky
Created May 28, 2011 06:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save abicky/793bfe6b193afdc0a6fb to your computer and use it in GitHub Desktop.
Save abicky/793bfe6b193afdc0a6fb to your computer and use it in GitHub Desktop.
library(RMySQL)
if (file.exists("print.mysqlike.R")) {
source("print.mysqlike.R")
print.data.frame <- print.mysqlike
}
# make sample data
set.seed(1000)
product <- data.frame(id = 101:110, name = sample(LETTERS, 10), price = sample(5:20 * 100, 10))
n <- 100
makeOrder <- function(term, n) {
data.frame(order_id = sort(sample(seq(1001, len = n * 2), n)),
date = sort(sample(paste("2011-05-", term, sep = ""), n, replace = TRUE)),
customer_id = sample(10001:10100, n, replace = TRUE),
product_id = sample(product$id, n, replace = TRUE),
number = sample(1:5, n, replace = TRUE))
}
order1 <- makeOrder(21:24, n)
order2 <- makeOrder(25:28, n)
# Do below queries in MySQL if 'test' database and 'test' user don't exist
#
# CREATE DATABASE IF NOT EXISTS test;
# GRANT ALL PRIVILEGES ON test.* to test@localhost;
# FLUSH PRIVILEGES;
m <- dbDriver("MySQL")
con <- dbConnect(m, dbname = "test", user = "test")
# You can replace the below 4 lines with these lines but the structure of the tables is too terrible.
# dbWriteTable(con, "product", product, row.names = FALSE)
# dbWriteTable(con, "orders", orders, row.names = FALSE)
dbGetQuery(con, "CREATE TABLE product (id INT PRIMARY KEY, name CHAR(1), price INT)")
dbGetQuery(con, "CREATE TABLE order1 (order_id INT PRIMARY KEY, date DATE, customer_id INT, product_id INT, number INT)")
dbGetQuery(con, "CREATE TABLE order2 (order_id INT PRIMARY KEY, date DATE, customer_id INT, product_id INT, number INT)")
dbWriteTable(con, "product", product, append = TRUE, row.names = FALSE)
dbWriteTable(con, "order1", order1, append = TRUE, row.names = FALSE)
dbWriteTable(con, "order2", order2, append = TRUE, row.names = FALSE)
#-------#
# Basic #
#-------#
# SELECT order_id, customer_id FROM order1;
dbGetQuery(con, "SELECT order_id, customer_id FROM order1")
order1[c("order_id", "customer_id")]
# SELECT * FROM order1 LIMIT 5;
dbGetQuery(con, "SELECT * FROM order1 LIMIT 5")
head(order1, n = 5)
# SELECT * FROM order1 LIMIT 2, 5;
dbGetQuery(con, "SELECT * FROM order1 LIMIT 2, 5")
order1[seq(3, len = 5),]
# SELECT DISTINCT customer_id FROM order1;
dbGetQuery(con, "SELECT DISTINCT customer_id FROM order1")
unique(order1["customer_id"])
# SELECT COUNT(*) FROM order1;
dbGetQuery(con, "SELECT COUNT(*) FROM order1")
nrow(order1)
#-------#
# WHERE #
#-------#
# SELECT * FROM order1 WHERE customer_id = 10038;
dbGetQuery(con, "SELECT * FROM order1 WHERE customer_id = 10038")
order1[order1$customer_id == 10038,]
subset(order1, customer_id == 10038)
# use 'droplevels' if you want to remove unused levels
# subset(order1, customer_id == 10038)$date
# [1] 2011-05-21 2011-05-22 2011-05-22
# Levels: 2011-05-21 2011-05-22 2011-05-23 2011-05-24
#
# droplevels(subset(order1, customer_id == 10038))$date
# [1] 2011-05-21 2011-05-22 2011-05-22
# Levels: 2011-05-21 2011-05-22
# SELECT * FROM order1 WHERE customer_id = 10038 AND date = '2011-05-21';
dbGetQuery(con, "SELECT * FROM order1 WHERE customer_id = 10038 AND date = '2011-05-21'")
order1[order1$customer_id == 10038 & order1$date == "2011-05-21",]
subset(order1, customer_id == 10038 & order1$date == "2011-05-21")
# SELECT * FROM order1 WHERE customer_id = 10038 OR customer_id = 10050;
dbGetQuery(con, "SELECT * FROM order1 WHERE customer_id = 10038 OR customer_id = 10050")
order1[order1$customer_id == 10038 | order1$customer_id == 10050,]
subset(order1, customer_id == 10038 | customer_id == 10050)
# SELECT * FROM order1 WHERE customer_id in (10038, 10050, 10079);
dbGetQuery(con, "SELECT * FROM order1 WHERE customer_id in (10038, 10050, 10079)")
order1[order1$customer_id %in% c(10038, 10050, 10079),]
subset(order1, customer_id %in% c(10038, 10050, 10079))
#----------#
# ORDER BY #
#----------#
# SELECT * FROM order1 ORDER BY customer_id;
dbGetQuery(con, "SELECT * FROM order1 ORDER BY customer_id")
order1[order(order1$customer_id),]
# SELECT * FROM order1 ORDER BY customer_id, product_id;
dbGetQuery(con, "SELECT * FROM order1 ORDER BY customer_id, product_id")
order1[order(order1$customer_id, order1$product_id),]
#------#
# JOIN #
#------#
# SELECT * FROM order1, order2 WHERE order1.order_id = order2.order_id;
dbGetQuery(con, "SELECT * FROM order1, order2 WHERE order1.order_id = order2.order_id")
merge(order1, order2, by = "order_id")
# SELECT * FROM order1 LEFT JOIN order2 USING (order_id);
dbGetQuery(con, "SELECT * FROM order1 LEFT JOIN order2 USING (order_id)")
merge(order1, order2, by = "order_id", all.x = TRUE)
# SELECT * FROM order1 RIGHT JOIN order2 USING (order_id);
dbGetQuery(con, "SELECT * FROM order1 RIGHT JOIN order2 USING (order_id)")
merge(order1, order2, by = "order_id", all.y = TRUE)
# FULL JOIN (MySQL not support)
merge(order1, order2, by = "order_id", all = TRUE)
# SELECT product_id, order_id, date, customer_id, number, name, price FROM order1, product WHERE id = product_id;
dbGetQuery(con, "SELECT * FROM order1, product WHERE id = product_id")
merge(order1, product, by.x = "product_id", by.y = "id")
#----------#
# GROUP BY #
#----------#
# SELECT customer_id, COUNT(*) FROM order1 GROUP BY customer_id;
dbGetQuery(con, "SELECT customer_id, COUNT(*) FROM order1 GROUP BY customer_id")
as.data.frame(table(order1$customer_id))
# SELECT customer_id, date, COUNT(*) FROM order1 GROUP BY customer_id, date;
dbGetQuery(con, "SELECT customer_id, date, COUNT(*) FROM order1 GROUP BY customer_id, date")
as.data.frame(table(order1$customer_id, order1$date))
# SELECT customer_id, AVG(number) FROM order1 GROUP BY customer_id;
dbGetQuery(con, "SELECT customer_id, AVG(number) FROM order1 GROUP BY customer_id")
aggregate(order1$number, list(order1$customer_id), mean)
# SELECT customer_id, AVG(number) FROM order1 GROUP BY customer_id, date;
dbGetQuery(con, "SELECT customer_id, date, AVG(number) FROM order1 GROUP BY customer_id, date")
aggregate(order1$number, list(order1$customer_id, order1$date), mean)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment