Skip to content

Instantly share code, notes, and snippets.

@leeper
Created June 18, 2017 13:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save leeper/bfdb78ed7a5f15553857307d41574700 to your computer and use it in GitHub Desktop.
Save leeper/bfdb78ed7a5f15553857307d41574700 to your computer and use it in GitHub Desktop.
Old code from an RSQLite tutorial
# R and SQLite tutorial
# Steph Locke, Mango Solutions
# 2015-11-30
library("RSQLite")
library("DBI")
dbListTables(datasetsDb())[1:2]
myDB <- dbConnect(SQLite(), "local.db")
dbListTables(myDB)
# Data Definition Language (DDL)
# CREATE TABLE
# ALTER TABLE
# DROP TABLE
# CREATE INDEX
# Data Manipulation Language (DML)
# CRUD operations
# ACID operations (not present in SQLite)
#+------------+#
# --- CRUD --- #
#+------------+#
# Create = INSERT
# Read = SELECT
# Update = UPDATE
# Delete = DELETE
# Upsert = MERGE ## conditional insertion; insert if absent & update if present
## INSERT OPERATIONS ##
# INSERT INTO airquality (ozone, month, temp)
# VALUES (6, 7, 8)
## SQLite tables have types and will error if wrong types supplied
## INSERT operations insert NUL unless column default is set
# write a table
qResult <- dbWriteTable(myDB, "airquality", airquality)
# insert some data ()
newdata <- airquality[is.na(airquality$Ozone), ]
qResult <- dbWriteTable(myDB, "airquality", newdata, append = TRUE)
sql <-
paste0("INSERT INTO airquality VALUES(",
paste(rep("?", ncol(newdata)),
collapse = ","),
")")
dbGetPreparedQuery(myDB, sql, bind.data = newdata)
# add a new table
dbWriteTable(myDB, "iris", iris)
# SELECT/read
# can also have JOIN, WHERE, GROUP BY, HAVING, ORDER BY
a <- dbReadTable(myDB, "airquality", select.cols = "ozone, wind")
b <- dbGetQuery(myDB, "SELECT ozone, wind FROM airquality")
identical(a,b)
dbGetQuery(myDB, "SELECT * FROM iris")
# all of these are equivalent:
dbGetQuery(myDB, "SELECT `Sepal.Length`, `Sepal.Width` FROM iris")
dbGetQuery(myDB, "SELECT `sepal.length`, `sepal.width` FROM iris")
dbGetQuery(myDB, 'SELECT "Sepal.Length", "Sepal.Width" FROM iris')
dbGetQuery(myDB, 'SELECT "sepal.length", "sepal.width" FROM iris')
# UPDATE (to modify records)
## typically need to use a WHERE modifier
dbGetQuery(myDB,
"UPDATE airquality
SET ozone = -99
WHERE ozone IS NULL")
dbGetQuery(myDB,
'UPDATE iris
SET Species = "versicolour"
WHERE Species = "versicolor"')
# dbSendQuery() # rarely needed; doesn't pull back all data when executing query
# DELETE
## use a `where` clause, otherwise everything gets deleted
## DELETE does not reset row identifiers
dbGetQuery(myDB, "DELETE FROM airquality WHERE month <= 4")
iris_deleted <- dbGetQuery(myDB, "SELECT * FROM iris WHERE `Sepal.Length` <= 4.5")
dbGetQuery(myDB, "DELETE FROM iris WHERE `Sepal.Length` <= 4.5")
#+-----------+#
# -- JOINS -- #
#+-----------+#
# INNER
# LEFT
## RIGHT (can rewrite any right join as a left join)
# FULL
# CROSS (cartesian product)
set.seed(1)
monthTbl <- data.frame(month = 1:10)
monthTbl$value <- monthTbl$month * rnorm(10)
dbWriteTable(myDB, "months", monthTbl[-5,])
# INNER join
dbGetQuery(myDB,
"SELECT *
FROM airquality a
INNER JOIN months m
ON m.month=a.month")
# LEFT join
dbGetQuery(myDB,
"SELECT *
FROM airquality a
LEFT JOIN months m
ON m.month=a.month")
# FULL join
dbGetQuery(myDB,
"SELECT *
FROM airquality a
FULL JOIN months m
ON m.month=a.month")
# CROSS join
dbGetQuery(myDB,
"SELECT *
FROM (SELECT DISTINCT month
FROM airquality) a
CROSS JOIN months m")
# example INNER join on `iris` data
tmp <- aggregate(. ~ Species, data = iris, FUN = length)
tmp$Species <- as.character(tmp$Species)
tmp[nrow(tmp) + 1, "Species"] <- "unknown"
tmp
dbWriteTable(myDB, "irisagg", tmp)
## ??? THIS IS WRONG
dbGetQuery(myDB,
"SELECT i.*, j.`Sepal.Length`
FROM irisagg i
LEFT JOIN iris j
ON i.Species=j.Species")
## WHERE statement
## retrieve rows matching two conditions
dbGetQuery(myDB,
"SELECT * FROM iris
WHERE `Petal.Length`*`Petal.Width` > 3
AND `Sepal.Length`*`Sepal.Width` <= 10")
## GROUP BY
dbGetQuery(myDB,
"SELECT AVG(`Sepal.Width`), AVG(`Sepal.Length`) FROM iris
GROUP BY `Species`")
# equivalent to:
aggregate(cbind(Sepal.Width, Sepal.Length) ~ Species, data = iris, FUN = mean)
# DROP (deletes a table, not just its contents)
## dbSendQuery(myDB, "DROP TABLE airquality")
## dbRemoveTable(myDB, "airquality")
# disconnect
dbDisconnect(myDB)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment