|
#include <Rcpp.h> |
|
#include <numeric> |
|
#include <map> |
|
#include <unordered_map> |
|
#include <unordered_set> |
|
#include <memory> |
|
#include <algorithm> |
|
#include <limits> |
|
|
|
// fast table for integer vectors. There is no checks for NAs. |
|
// [[Rcpp::export(rng = false)]] |
|
Rcpp::IntegerVector table_int_fast(Rcpp::IntegerVector const x){ |
|
std::map<int, R_xlen_t> counter_sorted; |
|
{ |
|
std::unordered_map<int, R_xlen_t> counter; |
|
int const *xi = &x[0]; |
|
for(R_xlen_t i = 0; i < x.size(); ++i) |
|
counter[*xi++]++; |
|
|
|
for(auto &v : counter) |
|
counter_sorted[v.first] = v.second; |
|
} |
|
|
|
Rcpp::IntegerVector out(counter_sorted.size()); |
|
Rcpp::CharacterVector names(counter_sorted.size()); |
|
R_xlen_t i = 0L; |
|
for(auto &v : counter_sorted){ |
|
names[i ] = v.first; |
|
out [i++] = v.second; |
|
} |
|
|
|
out.names() = names; |
|
return out; |
|
} |
|
|
|
// function to reduce the computation time of kinship2::makefamid. Note that |
|
// argument newid is by reference. This only works if typeof(newid) is integer. |
|
// [[Rcpp::export(".makefamid_fast_inner", rng = false)]] |
|
void makefamid_fast_inner |
|
(Rcpp::IntegerVector newid, Rcpp::IntegerVector const xid2, |
|
Rcpp::IntegerVector const newid_sorted, |
|
Rcpp::IntegerVector const grp_sizes){ |
|
int * const p_newid = &newid[0]; |
|
int const * p_xid2 = &xid2[0], |
|
* p_newid_sorted = &newid_sorted[0], |
|
* p_grp_size = &grp_sizes[0]; |
|
|
|
R_len_t const n_grps = grp_sizes.size(); |
|
if(n_grps != xid2.size()) |
|
Rcpp::stop("lengths of n_grps and mid do not match"); |
|
|
|
for(R_len_t i = 0; i < n_grps; ++i, ++p_grp_size, ++p_xid2){ |
|
int out = std::numeric_limits<int>::max(); |
|
for(int j = 0; j < *p_grp_size; ++j, ++p_newid_sorted) |
|
if(out > *p_newid_sorted) |
|
out = *p_newid_sorted; |
|
|
|
if(p_newid[*p_xid2] > out) |
|
p_newid[*p_xid2] = out; |
|
} |
|
} |
|
|
|
namespace { |
|
/// keeps track of family members |
|
struct family_tracker { |
|
struct family_member { |
|
R_len_t id; |
|
family_tracker * family; |
|
family_member * next_member = nullptr; |
|
}; |
|
|
|
R_len_t fam_id = std::numeric_limits<R_len_t>::max(); |
|
family_member * first_ele = nullptr, |
|
* last_ele = nullptr; |
|
size_t n_ele = 0L; |
|
|
|
inline void add_member(family_member * const new_member){ |
|
if(!new_member) |
|
return; |
|
|
|
fam_id = std::min(fam_id, new_member->id); |
|
new_member->family = this; |
|
|
|
if(n_ele < 1L){ |
|
// the first member |
|
first_ele = new_member; |
|
last_ele = new_member; |
|
n_ele = 1L; |
|
return; |
|
} |
|
|
|
// add the member |
|
last_ele->next_member = new_member; |
|
last_ele = new_member; |
|
++n_ele; |
|
} |
|
|
|
// should not be used |
|
family_tracker() = default; |
|
|
|
void merge(family_tracker &other){ |
|
if(other.fam_id != fam_id) |
|
add_members(other); |
|
} |
|
|
|
void merge(std::unordered_set<family_tracker*> &others){ |
|
for(auto x : others) |
|
if(x->fam_id != fam_id) |
|
add_members(*x); |
|
} |
|
|
|
private: |
|
inline void add_members(family_tracker &other){ |
|
// change the families |
|
{ |
|
family_member * x = other.first_ele; |
|
for(size_t i = 0; i < other.n_ele; ++i, x = x->next_member) |
|
x->family = this; |
|
} |
|
|
|
// update the last element, the number of elements, and the family id |
|
last_ele->next_member = other.first_ele; |
|
last_ele = other.last_ele; |
|
n_ele += other.n_ele; |
|
fam_id = std::min(fam_id, other.fam_id); |
|
|
|
// update other |
|
other.first_ele = nullptr; |
|
other.last_ele = nullptr; |
|
other.n_ele = 0L; |
|
} |
|
}; |
|
|
|
} // namespace |
|
|
|
// [[Rcpp::export(".makefamid_new", rng = false)]] |
|
Rcpp::IntegerVector makefamid_new(Rcpp::IntegerVector const drow, |
|
Rcpp::IntegerVector const mrow){ |
|
// setup families |
|
R_len_t const n = drow.size(); |
|
|
|
std::unique_ptr<family_tracker::family_member[]> members( |
|
new family_tracker::family_member[n]); |
|
std::unique_ptr<family_tracker[]> trackers(new family_tracker[n]); |
|
for(R_len_t j = 0; j < n; ++j){ |
|
members[j] = family_tracker::family_member { j, nullptr }; |
|
trackers[j].add_member(&members[j]); |
|
} |
|
|
|
// compute the order for drow and mrow |
|
auto get_ord = [&](Rcpp::IntegerVector const x) -> std::unique_ptr<int[]> { |
|
std::unique_ptr<int[]> out(new int[x.size()]); |
|
std::iota(out.get(), out.get() + x.size(), 0); |
|
|
|
int const *xp = &x[0]; |
|
std::sort(out.get(), out.get() + x.size(), |
|
[&](int const i, int const j){ return xp[i] < xp[j]; }); |
|
|
|
return out; |
|
}; |
|
|
|
std::unique_ptr<int[]> const m_ord = get_ord(mrow), |
|
d_ord = get_ord(drow); |
|
|
|
// find the family ids. Assign a lambda function to add all the relations |
|
// the mothers or fathers |
|
auto merge_families = |
|
[&](int const * prow, int const * p_ord) -> void { |
|
std::unordered_set<family_tracker*> families; |
|
for(R_len_t c = 0L; c < n; ){ |
|
R_len_t const cur_parent = prow[p_ord[c]]; |
|
if(cur_parent >= n) |
|
// we are done (reached the founders with no parents) |
|
break; |
|
|
|
// find the families to merge. We add the families to the largest one. |
|
// This is computationally attractive. Thus, we need to keep track of the |
|
// largest family |
|
families.clear(); |
|
family_tracker * largest_family(nullptr); |
|
size_t largest_family_size(0L); |
|
|
|
// adds a family to be merged |
|
auto add_family = [&](family_tracker * const new_fam) -> void { |
|
auto const ret_info = families.insert(new_fam); |
|
if(ret_info.second and |
|
new_fam->n_ele > largest_family_size){ |
|
largest_family_size = new_fam->n_ele; |
|
largest_family = new_fam; |
|
} |
|
}; |
|
|
|
// add the children to this family |
|
{ |
|
R_len_t p_ord_c; |
|
while(c < n and prow[(p_ord_c = p_ord[c])] == cur_parent){ |
|
add_family(members[p_ord_c].family); |
|
++c; |
|
} |
|
} |
|
|
|
// merge the families |
|
if(families.size() < 1) |
|
// nothing to merge |
|
continue; |
|
|
|
add_family(members[cur_parent].family); |
|
largest_family->merge(families); |
|
} |
|
}; |
|
|
|
// add the relations through the mothers and fathers |
|
merge_families(&mrow[0], &m_ord[0]); |
|
merge_families(&drow[0], &d_ord[0]); |
|
|
|
// setup the return object and return |
|
Rcpp::IntegerVector out(n); |
|
for(R_len_t j = 0; j < n; ++j) |
|
out[j] = members[j].family->fam_id + 1L; |
|
|
|
return out; |
|
} |
|
|
|
/*** R |
|
# faster version of kinship2::makefamid. |
|
makefamid_fast <- function(id, father.id, mother.id){ |
|
n <- length(id) |
|
mid <- c(match(mother.id, id, nomatch = n + 1), n + 1) |
|
did <- c(match(father.id, id, nomatch = n + 1), n + 1) |
|
## ADDED ## |
|
m_newid <- rep(NA_integer_, length(mid)) |
|
m_ord <- order(order(mid)) |
|
m_grp_size <- table_int_fast(mid) |
|
|
|
d_newid <- rep(NA_integer_, length(did)) |
|
d_ord <- order(order(did)) |
|
d_grp_size <- table_int_fast(did) |
|
########### |
|
## CHANGED ## |
|
# mid2 <- sort(unique(mid)) |
|
# did2 <- sort(unique(did)) |
|
mid2_m1 <- sort(unique(mid)) - 1L |
|
did2_m1 <- sort(unique(did)) - 1L |
|
############# |
|
famid <- 1:(n + 1) |
|
temp <- kindepth(id, father.id, mother.id) |
|
for (i in 1:n) { |
|
newid <- pmin(famid, famid[mid], famid[did]) |
|
## CHANGED ## |
|
# newid[mid2] <- pmin(newid[mid2], tapply(newid, mid, min)) |
|
m_newid[m_ord] <- newid |
|
.makefamid_fast_inner(newid = newid, xid2 = mid2_m1, |
|
newid_sorted = m_newid, grp_sizes = m_grp_size) |
|
############# |
|
newid[n + 1] <- n + 1L |
|
## CHANGED ## |
|
# newid[did2] <- pmin(newid[did2], tapply(newid, did, min)) |
|
d_newid[d_ord] <- newid |
|
dum <- .makefamid_fast_inner(newid = newid, xid2 = did2_m1, |
|
newid_sorted = d_newid, grp_sizes = d_grp_size) |
|
############# |
|
newid[n + 1] <- n + 1L |
|
if (all(newid == famid)) |
|
break |
|
else if (i < n) |
|
famid <- newid |
|
} |
|
if (all(newid == famid)) { |
|
famid <- famid[1:n] |
|
xx <- table_int_fast(famid) |
|
if (any(xx == 1)) { |
|
singles <- as.integer(names(xx[xx == 1])) |
|
famid[!is.na(match(famid, singles))] <- 0L |
|
match(famid, sort(unique(famid))) - 1L |
|
} |
|
else match(famid, sort(unique(famid))) |
|
} |
|
else stop("Bug in routine: seem to have found an infinite loop") |
|
} |
|
|
|
# a completely new implementation of kinship2::makefamid largely based on C++. |
|
makefamid_new <- function(id, father.id, mother.id){ |
|
stopifnot(require(kinship2), !anyDuplicated(id)) |
|
n <- length(id) |
|
mrow <- match(mother.id, id, nomatch = n + 1) - 1L |
|
drow <- match(father.id, id, nomatch = n + 1) - 1L |
|
|
|
out <- .makefamid_new(mrow = mrow, drow = drow) |
|
|
|
xx <- table_int_fast(out) |
|
if(any(xx == 1)) { |
|
singles <- as.integer(names(xx[xx == 1])) |
|
out[!is.na(match(out, singles))] <- 0L |
|
match(out, sort(unique(out))) - 1L |
|
} |
|
else match(out, sort(unique(out))) |
|
} |
|
*/ |