Skip to content

Instantly share code, notes, and snippets.

@cheuerde
Last active September 2, 2016 08:54
Show Gist options
  • Save cheuerde/a50ee1fd315b3732a30f7e96e37c1afc to your computer and use it in GitHub Desktop.
Save cheuerde/a50ee1fd315b3732a30f7e96e37c1afc to your computer and use it in GitHub Desktop.
Numerator Relationship Matrix for normal pedigrees and paternal pedigrees only
// Claas Heuer, February 2016
//
// Note: This is copied and modified form the sources of the
// cran package: 'pedigreemm'
#include <R.h>
#include <Rdefines.h>
#include "Rcpp.h"
/**
* This is a creplacement for the former R-function "getGenAncestors"
* It takes integer pointers to the sire, dam, id and generation vectors
* and it changes the generations directly on the passed R-object, so there is no
* return value = void.
*/
void calc_generation(int* sire, int* dam, int* id, int* gene, int this_id) {
/* j is the individual (label id) we are currently working with */
int j = this_id;
/* integers to store the generations of dam and sire */
int gene_sire, gene_dam;
/* If there are no parents assign 0 */
gene[j] = 0;
/* check first parent */
if(sire[j] != NA_INTEGER) {
/* if the parent is not a founder continue */
if (gene[sire[j]-1] == NA_INTEGER) {
/* we call this function again to get the generation of this parent - recursion */
/* The function doesnt return anything, it changes 'gene' directly */
/* we need the '-1' for the indices because R starts at 1, C at 0 */
calc_generation(sire, dam, id, gene, sire[j]-1);
gene_sire = 1 + gene[sire[j]-1];
} else {
/* if the parent is a founder my generation is 1 */
gene_sire = 1 + gene[sire[j]-1];
}
/* we assign the generation derived from father for this individual */
/* but this might get overriden by the dam's derived generation if it is larger */
gene[j] = gene_sire;
}
/* check second parent */
/* all the same as above */
if(dam[j] != NA_INTEGER) {
if (gene[dam[j]-1] == NA_INTEGER) {
calc_generation(sire, dam, id, gene, dam[j]-1);
gene_dam = 1 + gene[dam[j]-1];
} else {
gene_dam = 1 + gene[dam[j]-1];
}
/* if the dam's derived generation is higher then override gene[j] with it */
if (gene_dam > gene[j]) gene[j] = gene_dam;
}
}
/**
* This function iterates over every element of the pedigree and let
* "calc_generation" calculate the generations. Again without a return value.
*/
// [[Rcpp::export]]
void GetGeneration(SEXP sire_in, SEXP dam_in, SEXP id_in, SEXP gene_in, SEXP verbose_in) {
// get R-vectors for sire, dam, id and generation
int *sire = INTEGER(sire_in),
*dam = INTEGER(dam_in),
*id = INTEGER(id_in),
*gene = INTEGER(gene_in);
int verbose = *INTEGER(verbose_in);
int n = LENGTH(sire_in);
for(int i=0; i<n; i++) {
if(verbose) Rprintf("%i \n", i);
// we only need to calculate the generation for non-founders, which
// is the case when we assigned 'NA' to generation (founders have 0)
if(gene[i] == NA_INTEGER) {
// call the function above to get the generation for individual 'i'.
// we pass the vectors as pointers to calc_generation()
calc_generation(sire, dam, id, gene, i);
}
}
}
# Claas Heuer, February 2016
#
# Fast version of editPed form pedigreemm package
if (!require("pacman")) install.packages("pacman")
pacman::p_load(Rcpp, pedigreemm)
sourceCpp("EditPedFast.cpp")
EditPedFast <- function(sire, dam, label, verbose = FALSE) {
nped <- length(sire)
if (nped != length(dam)) stop("sire and dam have to be of the same length")
if (nped != length(label)) stop("label has to be of the same length than sire and dam")
tmp <- unique(sort(c(as.character(sire), as.character(dam))))
missingP <-NULL
if(any(completeId <- ! tmp %in% as.character(label))) missingP <- tmp[completeId]
labelOl <- c(as.character(missingP),as.character(label))
sireOl <- c(rep(NA, times=length(missingP)),as.character(sire))
damOl <- c(rep(NA, times=length(missingP)),as.character(dam))
sire <- as.integer(factor(sireOl, levels = labelOl))
dam <- as.integer(factor(damOl, levels = labelOl))
nped <-length(labelOl)
label <-1:nped
sire[!is.na(sire) & (sire<1 | sire>nped)] <- NA
dam[!is.na(dam) & (dam < 1 | dam > nped)] <- NA
pede <- data.frame(id= label, sire= sire, dam= dam, gene=rep(NA, times=nped))
noParents <- (is.na(pede$sire) & is.na(pede$dam))
pede$gene[noParents] <- 0
pede$gene<-as.integer(pede$gene)
# new version that calls the C-function "get_generation"
# Note: there is no return value, the R-objects are directly being changed
GetGeneration(pede$sire, pede$dam, pede$id, pede$gene, as.integer(verbose))
ord<- order(pede$gene)
ans<-data.frame(label=labelOl, sire=sireOl, dam=damOl, gene=pede$gene,
stringsAsFactors =F)
ans[ord,]
}
# Claas Heuer, March 2016
#
# Very simple improvement of pedigreemm::getAInv.
# Replaced dense diagonal by sparse one
GetAinvFast <- function(ped) {
stopifnot(is(ped, "pedigree"))
T_Inv <- as(ped, "sparseMatrix")
D_Inv <- Diagonal(x=1/Dmat(ped))
aiMx<-t(T_Inv) %*% D_Inv %*% T_Inv
dimnames(aiMx)[[1]]<-dimnames(aiMx)[[2]] <-ped@label
aiMx
}
#' @name GetNRM
#' @title Constructing numerator relationship matrix using full pedigree or sire and MGS
#' @author Claas Heuer
#' @details April 2016
#' @usage
#' GetNRM(ped, mgs = FALSE)
#' @description
#' Construct numerator relationship matrix based on regular pedigree file
#' or using sire and maternal grandsire information.
#' @param ped
#' \code{data.frame} with three columns: \code{id}, \code{sire} and \code{dam/mgs}
#' @param mgs
#' Indicate whether third column of \code{ped} is the maternal grandsire rather than the dam
#' @return Numerator Relationship Matrix as \code{dgCMatrix}
#' @examples
#'
#' A <- GetNRM(ped, mgs = FALSE)
if (!require("pacman")) install.packages("pacman")
pacman::p_load(pedigreemm)
source("EditPedFast.r")
#' @export
GetNRM <- function(ped, mgs = FALSE) {
if(!is.data.frame(ped)) stop("ped must be a data.frame")
colnames(ped)[1:3] <- c("id", "sire", "dam")
if(mgs) colnames(ped)[3] <- "mgs"
pednew <- ped
# if dam is replaced by mgs we expand pedigree with pseudo dams
if(mgs) {
damped <- data.frame(id = paste("PseudoDam_", 1:nrow(ped), sep = ""),
sire = ped$mgs, dam = NA, stringsAsFactors = FALSE)
# fill dam column with pseudodams
pednew <- data.frame(id = ped$id, sire = ped$sire, dam = damped$id, stringsAsFactors = FALSE)
# add dams to pedigree
pednew <- rbind(damped, pednew)
}
# add founders
PEDtemp <- EditPedFast(label = pednew$id, sire = pednew$sire, dam = pednew$dam)
PED <- pedigreemm::pedigree(label = PEDtemp$label, dam = PEDtemp$dam,
sire = PEDtemp$sire)
# get A without pseudodams
L <- t(relfactor(PED))
if(mgs) {
A <- tcrossprod(L[!PED@label %in% damped$id,])
ids <- PED@label[!PED@label %in% damped$id]
} else {
A <- tcrossprod(L)
ids <- PED@label
}
A <- as(A, "dgCMatrix")
rownames(A) <- ids
colnames(A) <- ids
return(A)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment