Preview of using Revolution R Enterprise inside SQL Server 2016
### Connect to SQL Server using RODBC | |
library(RODBC) | |
library(magrittr) | |
# Connect to SQL Server using RODBC ------------------ | |
sqlHost <- "DAA136209339.sys-sqlsvr.local" | |
sqlDatabase <- "RevoTestDB" | |
dsnString <- "driver={SQL Server};server=%s;database=%s;trusted_connection=true" | |
dsn <- sprintf(dsnString, sqlHost, sqlDatabase) | |
dbHandle <- odbcDriverConnect(dsn) | |
# Send query and return data ------------------------- | |
query <- "SELECT * FROM AirlineDemoSmall" | |
airDataFrame <- sqlQuery(dbHandle, query) | |
airDataFrame %>% head | |
str(airDataFrame) | |
# Create linear model using lm() --------------------- | |
day.names <- paste0(c("Mon", "Tues", "Wednes", "Thurs", "Fri", "Satur", "Sun"), | |
"day") | |
levels(airDataFrame$DayOfWeek) <- day.names | |
model.lm <- lm(ArrDelay ~ CRSDepTime + DayOfWeek - 1, airDataFrame) | |
summary(model.lm) | |
library(coefplot) | |
coefplot(model.lm, title = "Linear model using lm()") | |
# Close connection ----------------------------------- | |
# odbcClose(dbHandle) | |
### Use RevoScaleR RxLocalParallel compute context | |
day.names <- paste0(c("Mon", "Tues", "Wednes", "Thurs", "Fri", "Satur", "Sun"), | |
"day") | |
# Define input object as local csv file -------------- | |
air.csv <- RxTextData( | |
"airdata.csv", | |
rowsPerRead = 100e3, | |
delimiter = ",", | |
colInfo = list(ArrDelay = list(type = "integer"), | |
DayOfWeek = list(type = "factor", levels = day.names) | |
) | |
) | |
# Convert to xdf ------------------------------------- | |
xdf <- "airdata.xdf" | |
rxDataStep(air.csv, outFile = xdf, overwrite = TRUE) | |
# Set local parallel compute context------------------ | |
rxSetComputeContext( | |
RxLocalParallel() | |
) | |
rxGetInfo(xdf) | |
rxGetVarInfo(xdf) | |
# Get data summary and create linear model ----------- | |
rxSummary(~ArrDelay + DayOfWeek, data = xdf, reportProgress = 0) | |
model.rxLinMod <- rxLinMod(ArrDelay ~ CRSDepTime + DayOfWeek - 1, xdf, reportProgress = 2) | |
summary(model.rxLinMod) | |
library(coefplot) | |
coefplot(model.rxLinMod, title = "Linear model using rxLinMod()") | |
### Use RevoScaleR RxInSqlServer compute context | |
sqlHost <- "DAA136209339.sys-sqlsvr.local" | |
sqlDatabase <- "RevoTestDB" | |
sqlUser <- "xxxxxxx" | |
sqlPw <- "xxxxxxxx" | |
day.names <- paste0(c("Mon", "Tues", "Wednes", "Thurs", "Fri", "Satur", "Sun"), | |
"day") | |
# Define SQL Server connection string ---------------- | |
con <- sprintf("Driver=SQL Server;Server=%s;Database=%s;Uid=%s;Pwd=%s", | |
sqlHost, sqlDatabase, sqlUser, sqlPw) | |
# Define data object, including database table ------- | |
airData <- RxSqlServerData( | |
connectionString = con, | |
table = "AirlineDemoSmall", | |
colInfo = list(ArrDelay = list(type = "integer"), | |
DayOfWeek = list(type = "factor", levels = day.names) | |
) | |
) | |
# Define compute context ----------------------------- | |
rxSetComputeContext( | |
RxInSqlServer(connectionString = con, | |
autoCleanup = FALSE, | |
consoleOutput = TRUE | |
) | |
) | |
# Perform analysis inside database ------------------- | |
model.rxLinMod <- rxLinMod(ArrDelay ~ CRSDepTime + DayOfWeek - 1, airData) | |
summary(model.rxLinMod) | |
library(coefplot) | |
coefplot(model.rxLinMod, title = "Linear model using rxLinMod()") |
EXECUTE sp_execute_external_script | |
@language = N'R' | |
, @script = N'OutputDataSet <- InputDataSet' | |
, @input_data_1 = N'SELECT 1 as Col' | |
WITH RESULT SETS ((col int not null)); | |
go | |
EXECUTE sp_execute_external_script | |
@language = N'R' | |
, @script = N'OutputDataSet <- subset(iris, select=-Species);' | |
--, @parallel = 0 | |
, @input_data_1 = N'SELECT 1 as Col' | |
WITH RESULT SETS (("Sepal.Length" float not null, "Sepal.Width" float not null | |
, "Petal.Length" float not null, "Petal.Width" float not null)); | |
go |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment