-
-
Save abicky/793bfe6b193afdc0a6fb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(RMySQL) | |
if (file.exists("print.mysqlike.R")) { | |
source("print.mysqlike.R") | |
print.data.frame <- print.mysqlike | |
} | |
# make sample data | |
set.seed(1000) | |
product <- data.frame(id = 101:110, name = sample(LETTERS, 10), price = sample(5:20 * 100, 10)) | |
n <- 100 | |
makeOrder <- function(term, n) { | |
data.frame(order_id = sort(sample(seq(1001, len = n * 2), n)), | |
date = sort(sample(paste("2011-05-", term, sep = ""), n, replace = TRUE)), | |
customer_id = sample(10001:10100, n, replace = TRUE), | |
product_id = sample(product$id, n, replace = TRUE), | |
number = sample(1:5, n, replace = TRUE)) | |
} | |
order1 <- makeOrder(21:24, n) | |
order2 <- makeOrder(25:28, n) | |
# Do below queries in MySQL if 'test' database and 'test' user don't exist | |
# | |
# CREATE DATABASE IF NOT EXISTS test; | |
# GRANT ALL PRIVILEGES ON test.* to test@localhost; | |
# FLUSH PRIVILEGES; | |
m <- dbDriver("MySQL") | |
con <- dbConnect(m, dbname = "test", user = "test") | |
# You can replace the below 4 lines with these lines but the structure of the tables is too terrible. | |
# dbWriteTable(con, "product", product, row.names = FALSE) | |
# dbWriteTable(con, "orders", orders, row.names = FALSE) | |
dbGetQuery(con, "CREATE TABLE product (id INT PRIMARY KEY, name CHAR(1), price INT)") | |
dbGetQuery(con, "CREATE TABLE order1 (order_id INT PRIMARY KEY, date DATE, customer_id INT, product_id INT, number INT)") | |
dbGetQuery(con, "CREATE TABLE order2 (order_id INT PRIMARY KEY, date DATE, customer_id INT, product_id INT, number INT)") | |
dbWriteTable(con, "product", product, append = TRUE, row.names = FALSE) | |
dbWriteTable(con, "order1", order1, append = TRUE, row.names = FALSE) | |
dbWriteTable(con, "order2", order2, append = TRUE, row.names = FALSE) | |
#-------# | |
# Basic # | |
#-------# | |
# SELECT order_id, customer_id FROM order1; | |
dbGetQuery(con, "SELECT order_id, customer_id FROM order1") | |
order1[c("order_id", "customer_id")] | |
# SELECT * FROM order1 LIMIT 5; | |
dbGetQuery(con, "SELECT * FROM order1 LIMIT 5") | |
head(order1, n = 5) | |
# SELECT * FROM order1 LIMIT 2, 5; | |
dbGetQuery(con, "SELECT * FROM order1 LIMIT 2, 5") | |
order1[seq(3, len = 5),] | |
# SELECT DISTINCT customer_id FROM order1; | |
dbGetQuery(con, "SELECT DISTINCT customer_id FROM order1") | |
unique(order1["customer_id"]) | |
# SELECT COUNT(*) FROM order1; | |
dbGetQuery(con, "SELECT COUNT(*) FROM order1") | |
nrow(order1) | |
#-------# | |
# WHERE # | |
#-------# | |
# SELECT * FROM order1 WHERE customer_id = 10038; | |
dbGetQuery(con, "SELECT * FROM order1 WHERE customer_id = 10038") | |
order1[order1$customer_id == 10038,] | |
subset(order1, customer_id == 10038) | |
# use 'droplevels' if you want to remove unused levels | |
# subset(order1, customer_id == 10038)$date | |
# [1] 2011-05-21 2011-05-22 2011-05-22 | |
# Levels: 2011-05-21 2011-05-22 2011-05-23 2011-05-24 | |
# | |
# droplevels(subset(order1, customer_id == 10038))$date | |
# [1] 2011-05-21 2011-05-22 2011-05-22 | |
# Levels: 2011-05-21 2011-05-22 | |
# SELECT * FROM order1 WHERE customer_id = 10038 AND date = '2011-05-21'; | |
dbGetQuery(con, "SELECT * FROM order1 WHERE customer_id = 10038 AND date = '2011-05-21'") | |
order1[order1$customer_id == 10038 & order1$date == "2011-05-21",] | |
subset(order1, customer_id == 10038 & order1$date == "2011-05-21") | |
# SELECT * FROM order1 WHERE customer_id = 10038 OR customer_id = 10050; | |
dbGetQuery(con, "SELECT * FROM order1 WHERE customer_id = 10038 OR customer_id = 10050") | |
order1[order1$customer_id == 10038 | order1$customer_id == 10050,] | |
subset(order1, customer_id == 10038 | customer_id == 10050) | |
# SELECT * FROM order1 WHERE customer_id in (10038, 10050, 10079); | |
dbGetQuery(con, "SELECT * FROM order1 WHERE customer_id in (10038, 10050, 10079)") | |
order1[order1$customer_id %in% c(10038, 10050, 10079),] | |
subset(order1, customer_id %in% c(10038, 10050, 10079)) | |
#----------# | |
# ORDER BY # | |
#----------# | |
# SELECT * FROM order1 ORDER BY customer_id; | |
dbGetQuery(con, "SELECT * FROM order1 ORDER BY customer_id") | |
order1[order(order1$customer_id),] | |
# SELECT * FROM order1 ORDER BY customer_id, product_id; | |
dbGetQuery(con, "SELECT * FROM order1 ORDER BY customer_id, product_id") | |
order1[order(order1$customer_id, order1$product_id),] | |
#------# | |
# JOIN # | |
#------# | |
# SELECT * FROM order1, order2 WHERE order1.order_id = order2.order_id; | |
dbGetQuery(con, "SELECT * FROM order1, order2 WHERE order1.order_id = order2.order_id") | |
merge(order1, order2, by = "order_id") | |
# SELECT * FROM order1 LEFT JOIN order2 USING (order_id); | |
dbGetQuery(con, "SELECT * FROM order1 LEFT JOIN order2 USING (order_id)") | |
merge(order1, order2, by = "order_id", all.x = TRUE) | |
# SELECT * FROM order1 RIGHT JOIN order2 USING (order_id); | |
dbGetQuery(con, "SELECT * FROM order1 RIGHT JOIN order2 USING (order_id)") | |
merge(order1, order2, by = "order_id", all.y = TRUE) | |
# FULL JOIN (MySQL not support) | |
merge(order1, order2, by = "order_id", all = TRUE) | |
# SELECT product_id, order_id, date, customer_id, number, name, price FROM order1, product WHERE id = product_id; | |
dbGetQuery(con, "SELECT * FROM order1, product WHERE id = product_id") | |
merge(order1, product, by.x = "product_id", by.y = "id") | |
#----------# | |
# GROUP BY # | |
#----------# | |
# SELECT customer_id, COUNT(*) FROM order1 GROUP BY customer_id; | |
dbGetQuery(con, "SELECT customer_id, COUNT(*) FROM order1 GROUP BY customer_id") | |
as.data.frame(table(order1$customer_id)) | |
# SELECT customer_id, date, COUNT(*) FROM order1 GROUP BY customer_id, date; | |
dbGetQuery(con, "SELECT customer_id, date, COUNT(*) FROM order1 GROUP BY customer_id, date") | |
as.data.frame(table(order1$customer_id, order1$date)) | |
# SELECT customer_id, AVG(number) FROM order1 GROUP BY customer_id; | |
dbGetQuery(con, "SELECT customer_id, AVG(number) FROM order1 GROUP BY customer_id") | |
aggregate(order1$number, list(order1$customer_id), mean) | |
# SELECT customer_id, AVG(number) FROM order1 GROUP BY customer_id, date; | |
dbGetQuery(con, "SELECT customer_id, date, AVG(number) FROM order1 GROUP BY customer_id, date") | |
aggregate(order1$number, list(order1$customer_id, order1$date), mean) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment