Skip to content

Instantly share code, notes, and snippets.

@boennecd
Last active April 14, 2021 13:39
Show Gist options
  • Save boennecd/b51550a8bec2fde6d48f8f3c2d1f69a1 to your computer and use it in GitHub Desktop.
Save boennecd/b51550a8bec2fde6d48f8f3c2d1f69a1 to your computer and use it in GitHub Desktop.
Provides a version of kinship2::kinship which returns a sparse matrix even when there is only one family.

Provides a version of kinship2::kinship that returns a sparse matrix even when there is only one family. A full dense matrix is never stored. The script assigns two functions to the global enviroment when Rcpp::sourceCpp'ed. These are kinship_sparse_R and kinship_sparse which is an only R and mainly C++ implementation, respectilvy. The function can be sourced by calling

download.file("https://gist.githubusercontent.com/boennecd/b51550a8bec2fde6d48f8f3c2d1f69a1/raw",
              cpp_file <- paste0(tempfile(), ".cpp"))
Rcpp::sourceCpp(cpp_file)
#include <Rcpp.h>
#include <deque>
#include <vector>
#include <algorithm>
namespace {
typedef R_len_t uword;
// struct to keep track of the relations of a given individual
struct kinship_link_dat {
struct links {
uword index;
double val;
links(uword const index, double const val): index(index), val(val) { }
};
uword const index;
std::deque<links> relations;
kinship_link_dat(uword const index = 0, double const val = 0):
index(index),
relations(([&]() -> std::deque<links> {
std::deque<links> out;
out.emplace_back(index, val);
return out;
})()) { }
void add_relation(uword const other, double const val){
relations.emplace_back(other, val);
}
};
} // namespace
// [[Rcpp::export(".kinship_sparse_inner", rng = false)]]
Rcpp::List kinship_sparse(Rcpp::IntegerVector const pdepth,
Rcpp::IntegerVector const drow,
Rcpp::IntegerVector const mrow){
// setup relation objects
uword const n = pdepth.size(),
max_depth = std::max<uword>(
1L, *std::max_element(pdepth.begin(), pdepth.end()));
std::vector<kinship_link_dat> link_dat;
link_dat.reserve(n);
for(uword j = 0L; j < n; ++j)
link_dat.emplace_back(j, 0.5);
// fill the relation data
for(uword depth = 1L; depth <= max_depth; ++depth){
for(uword j = 0L; j < n; ++j){
if(pdepth[j] != depth)
continue;
// add relations through the mother and father
auto &dat_j = link_dat.at(j);
auto add_relations = [&](int const parent) -> void {
if(parent < n){
auto &parent_dat = link_dat.at(parent);
for(auto &parent_relation : parent_dat.relations){
double const val = parent_relation.val * .5;
dat_j.add_relation(parent_relation.index, val);
link_dat.at(parent_relation.index).add_relation(j, val);
}
}
};
add_relations(mrow[j]);
add_relations(drow[j]);
}
}
// compute the length of the output
R_len_t n_out(0L);
for(auto const &dat_i : link_dat)
n_out += dat_i.relations.size();
// create the output object
Rcpp::List out = Rcpp::List::create(
Rcpp::Named("i") = Rcpp::IntegerVector(n_out),
Rcpp::Named("j") = Rcpp::IntegerVector(n_out),
Rcpp::Named("val") = Rcpp::NumericVector(n_out));
Rcpp::IntegerVector R_i(Rcpp::as<SEXP>(out["i"])),
R_j(Rcpp::as<SEXP>(out["j"]));
Rcpp::NumericVector val(Rcpp::as<SEXP>(out["val"]));
R_len_t counter(0L);
for(uword i = 0L; i < n; ++i){
for(auto const &dat_i : link_dat.at(i).relations){
R_i[counter ] = i + 1L;
R_j[counter ] = dat_i.index + 1L;
val[counter++] = dat_i.val;
}
}
return out;
}
/***R
# like kinship2::kinship but it returns sparse matrix.
#
# Args:
# see kinship2::kinship.
kinship_sparse_R <- function(id, dadid, momid, sex, chrtype = "autosome", ...){
stopifnot(require(kinship2), require(Matrix))
chrtype <- match.arg(casefold(chrtype), "autosome")
stopifnot(!anyDuplicated(id))
n <- length(id)
pdepth <- kindepth(id, dadid, momid)
if (n == 1)
return(matrix(0.5, 1, 1, dimnames = list(id, id)))
mrow <- match(momid, id, nomatch = n + 1)
drow <- match(dadid, id, nomatch = n + 1)
rel_dat <- lapply(1:n, function(x) cbind(idx = x, val = .5))
rel_dat <- c(rel_dat, list(cbind(idx = n + 1L, val = 0.)))
for (depth in 1:max(c(1, pdepth))) {
for (j in (1:n)[pdepth == depth]) {
# add relations through the mother
dat_mom <- rel_dat[[mrow[j]]]
for(i in 1:NROW(dat_mom)){
idx <- dat_mom[i, 1]
val <- dat_mom[i, 2]
rel_dat[[idx]] <- rbind(rel_dat[[idx]], c(j, val * .5))
}
# add relations through the father
dat_dad <- rel_dat[[drow[j]]]
for(i in 1:NROW(dat_dad)){
idx <- dat_dad[i, 1]
val <- dat_dad[i, 2]
rel_dat[[idx]] <- rbind(rel_dat[[idx]], c(j, val * .5))
}
# update entry
dat_mom[, 2] <- dat_mom[, 2] * .5
dat_dad[, 2] <- dat_dad[, 2] * .5
rel_dat[[j]] <- rbind(rel_dat[[j]], dat_mom, dat_dad)
}
}
indices <- Map(function(i, dat) list(
i = rep(i, NROW(dat)), j = as.integer(dat[, 1]),
val = dat[, 2]), i = seq_along(rel_dat), dat = rel_dat)
i <- unlist(sapply(indices, `[[`, "i"))
j <- unlist(sapply(indices, `[[`, "j"))
val <- unlist(sapply(indices, `[[`, "val"))
kmat <- sparseMatrix(i = i, j = j, x = val)
kmat <- kmat[1:n, 1:n]
dimnames(kmat) <- list(id, id)
kmat
}
# like kinship_sparse_R but uses a C++ implementation.
#
# Args:
# see kinship2::kinship.
kinship_sparse <- function(id, dadid, momid, sex, chrtype = "autosome", ...){
stopifnot(require(kinship2), require(Matrix))
chrtype <- match.arg(casefold(chrtype), "autosome")
stopifnot(!anyDuplicated(id))
n <- length(id)
pdepth <- kindepth(id, dadid, momid)
if (n == 1)
return(matrix(0.5, 1, 1, dimnames = list(id, id)))
mrow <- match(momid, id, nomatch = n + 1L) - 1L
drow <- match(dadid, id, nomatch = n + 1L) - 1L
kmat_arg <- .kinship_sparse_inner(pdepth = pdepth, mrow = mrow, drow = drow)
kmat <- with(kmat_arg, sparseMatrix(i = i, j = j, x = val))
dimnames(kmat) <- list(id, id)
kmat
}
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment