Skip to content

Instantly share code, notes, and snippets.

@cseidman
Created January 24, 2018 14:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cseidman/7c403e4edf9f3efb2c09b8d34ee8f7a1 to your computer and use it in GitHub Desktop.
Save cseidman/7c403e4edf9f3efb2c09b8d34ee8f7a1 to your computer and use it in GitHub Desktop.
modelbuildsp
ALTER PROCEDURE [dbo].[buildCustomerTree]
@parallel_outer bit = 0,
@input_data_1_outer nvarchar(max) = N'select
[Age]
,[MaritalStatus]
,[Gender]
,[YearlyIncome]
,[TotalChildren]
,[NumberChildrenAtHome]
,[Education]
,[Occupation]
,[IsHomeOwner]
,[NumberCarsOwned]
,[YearsSinceFirstPurchase]
,[CommuteDistance]
,[CountryRegionCode]
,[StateProvinceCode]
,[IsCardUser]
from dbo.vwDimCustomer'
AS
BEGIN TRY
exec sp_execute_external_script
@language = N'R',
@script = N'
buildCustomerTree <- function (trainingDs)
{
trainingDs$TotalChildren <- as.factor(trainingDs$TotalChildren)
trainingDs$NumberChildrenAtHome <- as.factor(trainingDs$NumberChildrenAtHome)
trainingDs$NumberCarsOwned <- as.factor(trainingDs$NumberCarsOwned)
trainingDs$YearsSinceFirstPurchase <- as.factor(trainingDs$YearsSinceFirstPurchase)
trainingDs$AgeRanges <- cut(trainingDs$Age, breaks = c(0, 30, 50, 70, 90), labels = c("11-30", "31-50", "51-70","71+"))
trainingDs$IncomeCategories <- cut(x = trainingDs$YearlyIncome,
breaks = c(0, 20000, 50000, 70000, 100000, 250000), labels = c("Low","Lower", "Middle", "Upper", "Wealthy"))
tr <- rxDTree(IsCardUser ~ Gender + Occupation + MaritalStatus +
IsHomeOwner + TotalChildren + NumberCarsOwned + AgeRanges +
CommuteDistance + IncomeCategories + CountryRegionCode +
Education, data = trainingDs, method = "class", overwrite = TRUE,
reportProgress = 0, cp = 0.03)
data.frame(trained_model = as.raw(serialize(tr, connection = NULL)))
}
result <- buildCustomerTree(trainingDs = trainingDs)
if (is.data.frame(result)) {
OutputDataSet <- result
} else if (is.list(result) && length(result) == 1
&& is.data.frame(result[[1]])) {
OutputDataSet <- result[[1]]
} else if (!is.null(result)) {
stop(paste0("the R function must return either NULL,",
" a data frame, or a list that ",
"constains a single data frame"))
}
',
@parallel = @parallel_outer,
@input_data_1 = @input_data_1_outer,
@input_data_1_name = N'trainingDs'
END TRY
BEGIN CATCH
THROW;
END CATCH;
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment