malomarrec/slide_df.R

## slide_df.R
#This function "slides" dataframe blocks based on NA values
#Takes a dataframe and two column names
#On each row, one of these columns have to be NA
#The function returns a dataframe with a new column called <newname> with the combined non NA values of the first columns
#Different factor levels in both columns are dealt with by casting to character, then back to factor after the merge
#If newname is not specified, the first columns is overriden by the new column
#Optionnal parameters allows to drop the old columns

slide_df <- function(df,name1,name2,newname = NULL, drop = FALSE){
  require(dplyr)

  if(is.null(newname)){
    newname = name1
  }
  #Factor data needs to be converted to characters first in order no to lose levels
  if(is.factor(df[,name1]) || is.factor(df[,name2])){
    df[,name1] <- as.character(df[,name1])
    df[,name2] <- as.character(df[,name2])
  }

  slided_df = dplyr::mutate(df, !!newname := ifelse(is.na(df[,name1]),df[,name2],df[,name1]))

  #If factor data, we need to cast character back to factor data
  # if(is.factor(df[,name1]) || is.factor(df[,name2])){
  #  df[,newname] <- as.factor(df[,newname])
  # }

  if(drop){
    if(is.null(newname)){
      slided_df[,name1] <- NULL #dropping i only if a newname was specified, otherwise it contains the result
    }
    slided_df[,name2] <- NULL #dropping name_j
  }
  return(slided_df)
}


#Demo
a1 <- c(1,2,3,4,5,NA,NA,NA)
a2 <- c(NA,NA,NA,NA,NA,8,8,8)

b <- rep("a",8)
c1 <- c(1,2,3,4,5,NA,NA,NA)
c2 <- c(NA,NA,NA,NA,NA,'1','1','1')

b <- rep("a",8)
c1 <- c(1,2,3,4,5,NA,NA,NA)
c2 <- c(NA,NA,NA,NA,NA,'a','a','1')

df <- data.frame(a1 = a1,b = b, c1 = c1,a2=a2,c2=c2)
# a1 b c1 a2   c2
# 1  1 a  1 NA <NA>
#   2  2 a  2 NA <NA>
#   3  3 a  3 NA <NA>
#   4  4 a  4 NA <NA>
#   5  5 a  5 NA <NA>
#   6 NA a NA  8    a
# 7 NA a NA  8    a
# 8 NA a NA  8    1

> merge_cols(df,"a1","a2","a",drop=T)
 merge_cols(df,"a1","a2","a",drop=T)
# a1 b c1 a2   c2
# 1  1 a  1 NA <NA>
# 2  2 a  2 NA <NA>
# 3  3 a  3 NA <NA>
# 4  4 a  4 NA <NA>
# 5  5 a  5 NA <NA>
# 6 NA a NA  8    a
# 7 NA a NA  8    a
# 8 NA a NA  8    1
	#This function "slides" dataframe blocks based on NA values
	#Takes a dataframe and two column names
	#On each row, one of these columns have to be NA
	#The function returns a dataframe with a new column called <newname> with the combined non NA values of the first columns
	#Different factor levels in both columns are dealt with by casting to character, then back to factor after the merge
	#If newname is not specified, the first columns is overriden by the new column
	#Optionnal parameters allows to drop the old columns

	slide_df <- function(df,name1,name2,newname = NULL, drop = FALSE){
	require(dplyr)

	if(is.null(newname)){
	newname = name1
	}
	#Factor data needs to be converted to characters first in order no to lose levels
	if(is.factor(df[,name1]) \|\| is.factor(df[,name2])){
	df[,name1] <- as.character(df[,name1])
	df[,name2] <- as.character(df[,name2])
	}

	slided_df = dplyr::mutate(df, !!newname := ifelse(is.na(df[,name1]),df[,name2],df[,name1]))

	#If factor data, we need to cast character back to factor data
	# if(is.factor(df[,name1]) \|\| is.factor(df[,name2])){
	# df[,newname] <- as.factor(df[,newname])
	# }

	if(drop){
	if(is.null(newname)){
	slided_df[,name1] <- NULL #dropping i only if a newname was specified, otherwise it contains the result
	}
	slided_df[,name2] <- NULL #dropping name_j
	}
	return(slided_df)
	}


	#Demo
	a1 <- c(1,2,3,4,5,NA,NA,NA)
	a2 <- c(NA,NA,NA,NA,NA,8,8,8)

	b <- rep("a",8)
	c1 <- c(1,2,3,4,5,NA,NA,NA)
	c2 <- c(NA,NA,NA,NA,NA,'1','1','1')

	b <- rep("a",8)
	c1 <- c(1,2,3,4,5,NA,NA,NA)
	c2 <- c(NA,NA,NA,NA,NA,'a','a','1')

	df <- data.frame(a1 = a1,b = b, c1 = c1,a2=a2,c2=c2)
	# a1 b c1 a2 c2
	# 1 1 a 1 NA <NA>
	# 2 2 a 2 NA <NA>
	# 3 3 a 3 NA <NA>
	# 4 4 a 4 NA <NA>
	# 5 5 a 5 NA <NA>
	# 6 NA a NA 8 a
	# 7 NA a NA 8 a
	# 8 NA a NA 8 1

	> merge_cols(df,"a1","a2","a",drop=T)
	merge_cols(df,"a1","a2","a",drop=T)
	# a1 b c1 a2 c2
	# 1 1 a 1 NA <NA>
	# 2 2 a 2 NA <NA>
	# 3 3 a 3 NA <NA>
	# 4 4 a 4 NA <NA>
	# 5 5 a 5 NA <NA>
	# 6 NA a NA 8 a
	# 7 NA a NA 8 a
	# 8 NA a NA 8 1