rvprasad/batchRead.r

## batchRead.r
#' Read a file in chunks
#'
#' @param theConn providing the data, e.g., file('data/transactions.csv', 'r').
#' @param headers of the data being read.
#' @param leftOver rows that were not read but not returned by the previous invocation of this function.
#' @param col on which the data is grouped.
#' @return a list of two elements: data provided by the current invocation and leftOver to be used during the next invocation.
getDataFrameForNextId <- function(theFile, headers, leftOver, col) {
  while (NROW(leftOver) == 0 || NROW(unique(leftOver[,col])) < 2) {
    tmp1 <- read.csv(theFile, nrows=100000)
    if (NROW(tmp1) == 0) { break }
    colnames(tmp1) <- headers
    leftOver <- rbind(leftOver, tmp1)
  }
  tmp1 <- unique(leftOver[,col])[1]
  data <- leftOver[leftOver[,col] == tmp1,]
  leftOver <- leftOver[leftOver[,col] != tmp1,]
  return(list(data=data, leftOver=leftOver))
}
	#' Read a file in chunks
	#'
	#' @param theConn providing the data, e.g., file('data/transactions.csv', 'r').
	#' @param headers of the data being read.
	#' @param leftOver rows that were not read but not returned by the previous invocation of this function.
	#' @param col on which the data is grouped.
	#' @return a list of two elements: data provided by the current invocation and leftOver to be used during the next invocation.
	getDataFrameForNextId <- function(theFile, headers, leftOver, col) {
	while (NROW(leftOver) == 0 \|\| NROW(unique(leftOver[,col])) < 2) {
	tmp1 <- read.csv(theFile, nrows=100000)
	if (NROW(tmp1) == 0) { break }
	colnames(tmp1) <- headers
	leftOver <- rbind(leftOver, tmp1)
	}
	tmp1 <- unique(leftOver[,col])[1]
	data <- leftOver[leftOver[,col] == tmp1,]
	leftOver <- leftOver[leftOver[,col] != tmp1,]
	return(list(data=data, leftOver=leftOver))
	}