sudevschiz/split_data.R

## split_data.R
## Function to split the dataframe
## 143 is just a default seed

splitdf <- function(dataframe, seed=143,splitper) {
  if (!is.null(seed)) set.seed(seed)
  index <- 1:nrow(dataframe)
  trainindex <- sample(index, (splitper/100)*trunc(length(index)))
  trainset <- dataframe[trainindex, ]
  testset <- dataframe[-trainindex, ]
  list(trainset=trainset,testset=testset)
}


## Trainset percentage. Here 80%
tr_per = 80

## Split the sets to training and test set. Using the current time as the seed for randomisation
data_list <- splitdf(inputData_Sel,as.numeric(as.POSIXct(Sys.time())),tr_per)

## Now, the df data_list has both trainset and testset. Access using data_list$trainset and data_list$testset
	## Function to split the dataframe
	## 143 is just a default seed

	splitdf <- function(dataframe, seed=143,splitper) {
	if (!is.null(seed)) set.seed(seed)
	index <- 1:nrow(dataframe)
	trainindex <- sample(index, (splitper/100)*trunc(length(index)))
	trainset <- dataframe[trainindex, ]
	testset <- dataframe[-trainindex, ]
	list(trainset=trainset,testset=testset)
	}


	## Trainset percentage. Here 80%
	tr_per = 80

	## Split the sets to training and test set. Using the current time as the seed for randomisation
	data_list <- splitdf(inputData_Sel,as.numeric(as.POSIXct(Sys.time())),tr_per)

	## Now, the df data_list has both trainset and testset. Access using data_list$trainset and data_list$testset