Skip to content

Instantly share code, notes, and snippets.

@cdiener
Last active February 3, 2016 22:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cdiener/fb4ea6580d6cd46e2be4 to your computer and use it in GitHub Desktop.
Save cdiener/fb4ea6580d6cd46e2be4 to your computer and use it in GitHub Desktop.
ExpressionSet reducer - allows you to reduce the features of an expression set from an nxn grouping, for instance probesets to genes/transcripts etc.
Rcpp::sourceCpp("matrix_reduce.cpp")
#' Reduces an ExpressionSet by an n-to-n map of features to groups. All entries
#' in \code{features} must exist in \code{eset}. \code{features} and
#' \code{groups} must have the same length.
#'
#' @param eset An ExpressionSet object.
#' @param features A character vector of features to be grouped.
#' @param groups A factor or character vector mapping the entries in
#' \code{features} to groups.
#' @param fun The reduction function. Must operate on a n x m matrix and return
#' a vector of length m.
#' @param progress Should progress information be shown.
#' @return A new ExpressionSet with features given by \code{unique(groups)}.
eset_reduce <- function(eset, features, groups, fun, progress=TRUE) {
groups <- factor(groups)
ex <- exprs(eset)
idx <- 1:nrow(ex)
names(idx) <- rownames(ex)
idx <- idx[features]
facs <- as.integer(groups)
res <- matrix_reduce(ex, idx, facs, "mean")
rownames(res) <- levels(groups)
return(ExpressionSet(res))
}
/*
* matrix_reduce.cpp
*
* Copyright 2016 Christian Diener <mail[at]cdiener.com>
*
* MIT license. See LICENSE for more information.
*/
#include <Rcpp.h>
#include <vector>
// [[Rcpp::depends(RcppProgress)]]
#include <progress.hpp>
using namespace Rcpp;
template <typename C>
class index_sorter {
public:
index_sorter(C const& c) : c(c) {}
bool operator()(std::size_t const& lhs, std::size_t const& rhs) const {
return c[lhs] < c[rhs];
}
private:
C const& c;
};
NumericVector max_reduce(NumericMatrix m, std::vector<int> idx) {
NumericVector res(m.ncol(), 0.0);
double val;
int maxidx = 0;
double maxval = mean(m(idx[0], _));
for(int i=0; i<idx.size(); ++i) {
val = mean(m(idx[i], _));
if(val > maxval) {
maxval = val;
maxidx = idx[i];
}
}
res = m(maxidx, _);
return res;
}
NumericVector mean_reduce(NumericMatrix m, std::vector<int> idx) {
NumericVector res(m.ncol(), 0.0);
for(int i=0; i<idx.size(); ++i) res += m(idx[i], _);
return res/idx.size();
}
NumericVector median_reduce(NumericMatrix m, std::vector<int> idx) {
NumericVector res(m.ncol(), 0.0);
std::vector<double> means(idx.size());
for(int i=0; i<idx.size(); ++i) means[i] = mean(m(idx[i], _));
int i = idx.size()/2;
if(idx.size() % 2 == 0) {
std::nth_element(idx.begin(), idx.begin()+i-1, idx.end(),
index_sorter<std::vector<double> >(means));
res += m(idx[i-1], _);
std::nth_element(idx.begin(), idx.begin()+i, idx.end(),
index_sorter<std::vector<double> >(means));
res += m(idx[i-1], _);
res = 0.5*res;
} else {
std::nth_element(idx.begin(), idx.begin()+i-1, idx.end(),
index_sorter<std::vector<double> >(means));
res += m(idx[i], _);
}
return res;
}
// [[Rcpp::export]]
NumericVector matrix_reduce(NumericMatrix m, IntegerVector idx,
IntegerVector factors, std::string method) {
int minf = min(factors);
int nf = max(factors) - min(factors) + 1;
std::vector< std::vector<int> > map(nf);
//Rcout<<"Building factor map..."<<std::endl;
int fi;
for(int i=0; i<factors.size(); ++i) {
fi = factors[i] - minf;
map[fi].push_back(idx[i]-1);
}
//Rcout<<"Reducing matrix..."<<std::endl;
Progress p(nf, true);
NumericMatrix res(nf, m.ncol());
for(int fi=0; fi<nf; ++fi) {
p.increment();
if(method == "max") res(fi, _) = max_reduce(m, map[fi]);
else if(method == "median") res(fi, _) = median_reduce(m, map[fi]);
else res(fi, _) = mean_reduce(m, map[fi]);
}
return res;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment