Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@andrie
Created October 12, 2015 11:00
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save andrie/1d69d09c5eb2b1bc0033 to your computer and use it in GitHub Desktop.
Save andrie/1d69d09c5eb2b1bc0033 to your computer and use it in GitHub Desktop.
Preview of using Revolution R Enterprise inside SQL Server 2016
### Connect to SQL Server using RODBC
library(RODBC)
library(magrittr)
# Connect to SQL Server using RODBC ------------------
sqlHost <- "DAA136209339.sys-sqlsvr.local"
sqlDatabase <- "RevoTestDB"
dsnString <- "driver={SQL Server};server=%s;database=%s;trusted_connection=true"
dsn <- sprintf(dsnString, sqlHost, sqlDatabase)
dbHandle <- odbcDriverConnect(dsn)
# Send query and return data -------------------------
query <- "SELECT * FROM AirlineDemoSmall"
airDataFrame <- sqlQuery(dbHandle, query)
airDataFrame %>% head
str(airDataFrame)
# Create linear model using lm() ---------------------
day.names <- paste0(c("Mon", "Tues", "Wednes", "Thurs", "Fri", "Satur", "Sun"),
"day")
levels(airDataFrame$DayOfWeek) <- day.names
model.lm <- lm(ArrDelay ~ CRSDepTime + DayOfWeek - 1, airDataFrame)
summary(model.lm)
library(coefplot)
coefplot(model.lm, title = "Linear model using lm()")
# Close connection -----------------------------------
# odbcClose(dbHandle)
### Use RevoScaleR RxLocalParallel compute context
day.names <- paste0(c("Mon", "Tues", "Wednes", "Thurs", "Fri", "Satur", "Sun"),
"day")
# Define input object as local csv file --------------
air.csv <- RxTextData(
"airdata.csv",
rowsPerRead = 100e3,
delimiter = ",",
colInfo = list(ArrDelay = list(type = "integer"),
DayOfWeek = list(type = "factor", levels = day.names)
)
)
# Convert to xdf -------------------------------------
xdf <- "airdata.xdf"
rxDataStep(air.csv, outFile = xdf, overwrite = TRUE)
# Set local parallel compute context------------------
rxSetComputeContext(
RxLocalParallel()
)
rxGetInfo(xdf)
rxGetVarInfo(xdf)
# Get data summary and create linear model -----------
rxSummary(~ArrDelay + DayOfWeek, data = xdf, reportProgress = 0)
model.rxLinMod <- rxLinMod(ArrDelay ~ CRSDepTime + DayOfWeek - 1, xdf, reportProgress = 2)
summary(model.rxLinMod)
library(coefplot)
coefplot(model.rxLinMod, title = "Linear model using rxLinMod()")
### Use RevoScaleR RxInSqlServer compute context
sqlHost <- "DAA136209339.sys-sqlsvr.local"
sqlDatabase <- "RevoTestDB"
sqlUser <- "xxxxxxx"
sqlPw <- "xxxxxxxx"
day.names <- paste0(c("Mon", "Tues", "Wednes", "Thurs", "Fri", "Satur", "Sun"),
"day")
# Define SQL Server connection string ----------------
con <- sprintf("Driver=SQL Server;Server=%s;Database=%s;Uid=%s;Pwd=%s",
sqlHost, sqlDatabase, sqlUser, sqlPw)
# Define data object, including database table -------
airData <- RxSqlServerData(
connectionString = con,
table = "AirlineDemoSmall",
colInfo = list(ArrDelay = list(type = "integer"),
DayOfWeek = list(type = "factor", levels = day.names)
)
)
# Define compute context -----------------------------
rxSetComputeContext(
RxInSqlServer(connectionString = con,
autoCleanup = FALSE,
consoleOutput = TRUE
)
)
# Perform analysis inside database -------------------
model.rxLinMod <- rxLinMod(ArrDelay ~ CRSDepTime + DayOfWeek - 1, airData)
summary(model.rxLinMod)
library(coefplot)
coefplot(model.rxLinMod, title = "Linear model using rxLinMod()")
EXECUTE sp_execute_external_script
@language = N'R'
, @script = N'OutputDataSet <- InputDataSet'
, @input_data_1 = N'SELECT 1 as Col'
WITH RESULT SETS ((col int not null));
go
EXECUTE sp_execute_external_script
@language = N'R'
, @script = N'OutputDataSet <- subset(iris, select=-Species);'
--, @parallel = 0
, @input_data_1 = N'SELECT 1 as Col'
WITH RESULT SETS (("Sepal.Length" float not null, "Sepal.Width" float not null
, "Petal.Length" float not null, "Petal.Width" float not null));
go
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment