Skip to content

Instantly share code, notes, and snippets.

xdfData <- RxXdfData(file.path(rxGetOption("sampleDataDir"), "AirlineDemoSmallComposite"))
varInfo <- rxGetVarInfo(xdfData)
length(varInfo$DayOfWeek$levels)
rxOptions(traceLevel=4)
connectionString <- "Driver=SQL Server;Server=.;Uid=RevoTester;Pwd=*******;Database=RevoTestDB;"
sqlServerCC <- RxInSqlServer(connectionString = connectionString,
shareDir = "C:/Users/<username>/Documents",
numTasks = 2,
autoCleanup = FALSE,
consoleOutput=TRUE,
wait = TRUE)
data <- RxSqlServerData(table = "dbo.AirlineDemoSmall", connectionString = connectionString)
rxSetComputeContext(sqlServerCC)
rxOptions(traceLevel=4)
myHadoopCluster <- RxHadoopMR(
clientShareDir = "<client folder>",
sshUsername = "<head node user id>",
sshHostname = "<head node host name>",
sshSwitches = "-i <key file path>",
sshClientDir = "C:\\Program Files (x86)\\PuTTY",
consoleOutput = TRUE,
autoCleanup = FALSE,
hadoopSwitches = "-Dmapreduce.task.timeout=0")
rxOptions(traceLevel=4)
connectionString <- "Driver=Teradata;DBCNAME=<DBCNAME>;Uid=RevoTester;pwd=*********;Database=RevoTestDB"
teradataCC <- RxInTeradata(
connectionString = connectionString ,
shareDir = "C:/User/<username>/Documents",
remoteShareDir = "/tmp/revoJobs",
revoPath = "/usr/lib64/microsoft-r/3.3/lib64/R",
wait = TRUE,
consoleOutput = TRUE,
autoCleanup = FALSE)
sparkCC <- RxSpark(consoleOutput = TRUE, persistentRun = TRUE)
rxSetComputeContext(sparkCC)
dateColInfo <- list(
id = list(type="integer"),
depdate = list(type = "Date")
)
hive_data <- RxHiveData(
query = "select id, depdate from testdate",
sparkCC <- RxSpark(consoleOutput = TRUE, persistentRun = TRUE)
rxSetComputeContext(sparkCC)
#Optional
dateColInfo <- list(
id = list(type="integer"),
depdate = list(type = "character")
)
hive_data <- RxHiveData(
#RxHiveData only works with spark compute context
#If you need to submit to a specific yarn queue, you can use extraSparkConfig = "--conf spark.yarn.queue=<queue name>" in RxSpark
computeContext <- RxSpark(consoleOutput = TRUE, persistentRun = TRUE)
rxSetComputeContext(computeContext)
airColInfo <- list(
arrdelay = list(type = "integer"),
#crsdeptime = list(type = "numeric"),
dayofweek = list(
type = "factor",
#RxHiveData only works with spark compute context
computeContext <- RxSpark(consoleOutput = TRUE, persistentRun = TRUE)
rxSetComputeContext(computeContext)
airColInfo <- list(
arrdelay = list(type = "integer"),
#crsdeptime = list(type = "numeric"),
dayofweek = list(
type = "factor",
levels = c(
#RxHiveData only works with spark compute context
computeContext <- RxSpark(consoleOutput = TRUE, persistentRun = TRUE)
rxSetComputeContext(computeContext)
airColInfo <- list(
arrdelay = list(type = "integer"),
#crsdeptime = list(type = "numeric"),
dayofweek = list(
type = "factor",
levels = c(
#RxHiveData only works with spark compute context
computeContext <- RxSpark(consoleOutput = TRUE, persistentRun = TRUE)
rxSetComputeContext(computeContext)
airColInfo <- list(
arrdelay = list(type = "integer"),
#crsdeptime = list(type = "numeric"),
dayofweek = list(
type = "factor",
levels = c(