gvdr/matrices_dataframe.R

## matrices_dataframe.R
#' let's have a matrix

n <- 2
m <- 15
my_mat <- matrix(runif(n),m,n)

#' let's name those rows, they will be our observations

row.names(my_mat) <- letters[1:m]

#' we want to create a data.frame where
#' each rows of the matrix correspond to a row of data.frame
#' and we have a variable for the name of the row
#' one variable for the mean of the row
#' and we want to save the row itself

my_df <- data.frame(row.names = NULL,
                    letter = row.names(my_mat),
                    mean_number = rowMeans(my_mat),
                    row_value = my_mat
)

#' works smoothly. Yet:

names(my_df)

#' `data.frame()` actually created 9 variable, each for each column
#' of my_mat. we wanted just one variable containing a vector for
#' each column.
#'
#' We can see why when we try to bind more two dataframe coming from
#' matrices of different dimension

n <- 3
m <- 15
my_mat2 <- matrix(runif(n),m,n)
row.names(my_mat2) <- letters[1:m]


my_df2 <- data.frame(row.names = NULL,
                    letter = row.names(my_mat2),
                    mean_number = rowMeans(my_mat2),
                    row_value = my_mat2
                    )

full_data <- rbind(my_df,my_df2)

#' rbind does not work, as the number of dimension is wrong

full_data <- bind_rows(my_df,my_df2)

#' works but fills the "missing" values with NAs. And that's
#' interfer with the maths.
#'
#'
#' So, what's the right way of doing it?
	#' let's have a matrix

	n <- 2
	m <- 15
	my_mat <- matrix(runif(n),m,n)

	#' let's name those rows, they will be our observations

	row.names(my_mat) <- letters[1:m]

	#' we want to create a data.frame where
	#' each rows of the matrix correspond to a row of data.frame
	#' and we have a variable for the name of the row
	#' one variable for the mean of the row
	#' and we want to save the row itself

	my_df <- data.frame(row.names = NULL,
	letter = row.names(my_mat),
	mean_number = rowMeans(my_mat),
	row_value = my_mat
	)

	#' works smoothly. Yet:

	names(my_df)

	#' `data.frame()` actually created 9 variable, each for each column
	#' of my_mat. we wanted just one variable containing a vector for
	#' each column.
	#'
	#' We can see why when we try to bind more two dataframe coming from
	#' matrices of different dimension

	n <- 3
	m <- 15
	my_mat2 <- matrix(runif(n),m,n)
	row.names(my_mat2) <- letters[1:m]


	my_df2 <- data.frame(row.names = NULL,
	letter = row.names(my_mat2),
	mean_number = rowMeans(my_mat2),
	row_value = my_mat2
	)

	full_data <- rbind(my_df,my_df2)

	#' rbind does not work, as the number of dimension is wrong

	full_data <- bind_rows(my_df,my_df2)

	#' works but fills the "missing" values with NAs. And that's
	#' interfer with the maths.
	#'
	#'
	#' So, what's the right way of doing it?