Skip to content

Instantly share code, notes, and snippets.

@stufield
Last active May 20, 2019 21:49
Show Gist options
  • Save stufield/e8125fd6e0fcd1c0344a8a96d37dead7 to your computer and use it in GitHub Desktop.
Save stufield/e8125fd6e0fcd1c0344a8a96d37dead7 to your computer and use it in GitHub Desktop.
Rcpp of ssDAMN

Reprex output for Rcpp Single Sample DAMN Normalization


See the ssDAMN.cpp file is attached below

Note: you cannot reproduce entirely; SomaNormalization is private

The reprex output


library(SomaNormalization)
#> Loading required package: dplyr
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(bench)
options(width = 120)
set.seed(100)
test <- system.file("medianNorm_references",
                    "Population_Reference_V4_Covance_plasma.rds",
                    package = "SomaNormalization", mustWork = TRUE) %>%
  readRDS() %>%
  filter(SITE_ID == "Boise" & BMI < 30) %>%
  select(SampleType, SampleMatrix, SlideId, Subarray, SampleId, SampleGroup,
         getNormNames(.), sample(getAptamers(.), 5000))

# 5000 features
dim(test)
#> [1]  132 5009

bnch <- mark(
  orig_ssDAMN = singleSampleDAMN(test),                       # original
  cpp_ssDAMN  = SomaNormalization:::singleSampleDAMN2(test)   # uses Rcpp
)
#> Warning: Some expressions had a GC in every iteration; so filtering is disabled.

# Absolute differences
bnch
#> # A tibble: 2 x 10
#>   expression       min     mean   median      max `itr/sec` mem_alloc  n_gc n_itr total_time
#>   <bch:expr>  <bch:tm> <bch:tm> <bch:tm> <bch:tm>     <dbl> <bch:byt> <dbl> <int>   <bch:tm>
#> 1 orig_ssDAMN    6.99s    6.99s    6.99s    6.99s    0.143      3.7GB    23     1      6.99s
#> 2 cpp_ssDAMN    41.84s   41.84s   41.84s   41.84s    0.0239    2.11GB    15     1     41.84s

# Relative differences
summary(bnch, relative = TRUE)
#> Warning: Some expressions had a GC in every iteration; so filtering is disabled.
#> # A tibble: 2 x 10
#>   expression    min  mean median   max `itr/sec` mem_alloc  n_gc n_itr total_time
#>   <bch:expr>  <dbl> <dbl>  <dbl> <dbl>     <dbl>     <dbl> <dbl> <dbl>      <dbl>
#> 1 orig_ssDAMN  1     1      1     1         5.99      1.75  1.53     1       1   
#> 2 cpp_ssDAMN   5.99  5.99   5.99  5.99      1         1     1        1       5.99

Created on 2019-05-01 by the reprex package (v0.2.1)

Session info
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
#>  setting  value                       
#>  version  R version 3.5.2 (2018-12-20)
#>  os       macOS Mojave 10.14.4        
#>  system   x86_64, darwin15.6.0        
#>  ui       X11                         
#>  language (EN)                        
#>  collate  en_US.UTF-8                 
#>  ctype    en_US.UTF-8                 
#>  tz       America/Denver              
#>  date     2019-05-01                  
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
#>  package           * version    date       lib source                      
#>  assertthat          0.2.0      2017-04-11 [1] CRAN (R 3.5.0)              
#>  backports           1.1.3      2018-12-14 [1] CRAN (R 3.5.0)              
#>  bench             * 1.0.1.9000 2019-02-01 [1] Github (r-lib/bench@3e5d63f)
#>  bitops              1.0-6      2013-08-17 [1] CRAN (R 3.5.0)              
#>  callr               3.1.1      2018-12-21 [1] CRAN (R 3.5.0)              
#>  caTools             1.17.1.1   2018-07-20 [1] CRAN (R 3.5.0)              
#>  class               7.3-15     2019-01-01 [2] CRAN (R 3.5.2)              
#>  cli                 1.1.0      2019-03-19 [1] RSPM (R 3.5.2)              
#>  cluster             2.0.7-1    2018-04-13 [1] CRAN (R 3.5.0)              
#>  codetools           0.2-16     2018-12-24 [2] CRAN (R 3.5.2)              
#>  colorspace          1.4-0      2019-01-13 [1] CRAN (R 3.5.2)              
#>  crayon              1.3.4      2017-09-16 [1] RSPM (R 3.5.2)              
#>  data.table          1.12.0     2019-01-13 [1] CRAN (R 3.5.2)              
#>  dendextend          1.9.0      2018-10-19 [1] CRAN (R 3.5.0)              
#>  DEoptimR            1.0-8      2016-11-19 [1] CRAN (R 3.5.0)              
#>  desc                1.2.0      2018-05-01 [1] CRAN (R 3.5.0)              
#>  devtools            2.0.1      2018-10-26 [1] CRAN (R 3.5.1)              
#>  digest              0.6.18     2018-10-10 [1] CRAN (R 3.5.0)              
#>  diptest             0.75-7     2016-12-05 [1] CRAN (R 3.5.0)              
#>  dplyr             * 0.8.0.1    2019-02-15 [1] RSPM (R 3.5.2)              
#>  evaluate            0.13       2019-02-12 [1] CRAN (R 3.5.2)              
#>  fansi               0.4.0      2018-10-05 [1] CRAN (R 3.5.0)              
#>  flexmix             2.3-15     2019-02-18 [1] CRAN (R 3.5.2)              
#>  foreach             1.4.4      2017-12-12 [1] CRAN (R 3.5.0)              
#>  fpc                 2.1-11.1   2018-07-20 [1] CRAN (R 3.5.0)              
#>  fs                  1.2.6      2018-08-23 [1] CRAN (R 3.5.0)              
#>  gclus               1.3.2      2019-01-07 [1] CRAN (R 3.5.2)              
#>  gdata               2.18.0     2017-06-06 [1] CRAN (R 3.5.0)              
#>  ggplot2             3.1.1      2019-04-07 [1] RSPM (R 3.5.2)              
#>  glue                1.3.1      2019-03-12 [1] CRAN (R 3.5.2)              
#>  gplots              3.0.1.1    2019-01-27 [1] CRAN (R 3.5.2)              
#>  gridExtra           2.3        2017-09-09 [1] CRAN (R 3.5.0)              
#>  gtable              0.2.0      2016-02-26 [1] CRAN (R 3.5.0)              
#>  gtools              3.8.1      2018-06-26 [1] CRAN (R 3.5.0)              
#>  highr               0.7        2018-06-09 [1] CRAN (R 3.5.0)              
#>  hms                 0.4.2      2018-03-10 [1] CRAN (R 3.5.0)              
#>  htmltools           0.3.6      2017-04-28 [1] CRAN (R 3.5.0)              
#>  htmlwidgets         1.3        2018-09-30 [1] CRAN (R 3.5.0)              
#>  httr                1.4.0      2018-12-11 [1] CRAN (R 3.5.0)              
#>  iterators           1.0.10     2018-07-13 [1] CRAN (R 3.5.0)              
#>  jsonlite            1.6        2018-12-07 [1] CRAN (R 3.5.0)              
#>  kernlab             0.9-27     2018-08-10 [1] CRAN (R 3.5.0)              
#>  KernSmooth          2.23-15    2015-06-29 [2] CRAN (R 3.5.2)              
#>  knitr               1.22       2019-03-08 [1] CRAN (R 3.5.2)              
#>  latex2exp           0.4.0      2015-11-30 [1] CRAN (R 3.5.0)              
#>  lattice             0.20-38    2018-11-04 [1] CRAN (R 3.5.0)              
#>  lazyeval            0.2.1      2017-10-29 [1] CRAN (R 3.5.0)              
#>  magrittr            1.5        2014-11-22 [1] RSPM (R 3.5.2)              
#>  MASS                7.3-51.4   2019-03-31 [1] standard (@7.3-51.)         
#>  mclust              5.4.2      2018-11-17 [1] CRAN (R 3.5.0)              
#>  memoise             1.1.0      2017-04-21 [1] CRAN (R 3.5.0)              
#>  modeltools          0.2-22     2018-07-16 [1] CRAN (R 3.5.0)              
#>  munsell             0.5.0      2018-06-12 [1] CRAN (R 3.5.0)              
#>  mvtnorm             1.0-8      2018-05-31 [1] CRAN (R 3.5.0)              
#>  nnet                7.3-12     2016-02-02 [2] CRAN (R 3.5.2)              
#>  pillar              1.3.1      2018-12-15 [1] CRAN (R 3.5.0)              
#>  pkgbuild            1.0.2      2018-10-16 [1] CRAN (R 3.5.0)              
#>  pkgconfig           2.0.2      2018-08-16 [1] CRAN (R 3.5.0)              
#>  pkgload             1.0.2      2018-10-29 [1] CRAN (R 3.5.0)              
#>  plotly              4.9.0      2019-04-10 [1] RSPM (R 3.5.2)              
#>  plyr                1.8.4      2016-06-08 [1] CRAN (R 3.5.0)              
#>  prabclus            2.2-7      2019-01-17 [1] CRAN (R 3.5.2)              
#>  prettyunits         1.0.2      2015-07-13 [1] CRAN (R 3.5.0)              
#>  processx            3.2.1      2018-12-05 [1] CRAN (R 3.5.0)              
#>  profmem             0.5.0      2018-01-30 [1] CRAN (R 3.5.0)              
#>  ps                  1.3.0      2018-12-21 [1] CRAN (R 3.5.0)              
#>  purrr               0.3.2      2019-03-15 [1] RSPM (R 3.5.2)              
#>  R6                  2.4.0      2019-02-14 [1] CRAN (R 3.5.2)              
#>  Rcpp                1.0.0      2018-11-07 [1] CRAN (R 3.5.0)              
#>  readr               1.3.1      2018-12-21 [1] RSPM (R 3.5.2)              
#>  registry            0.5        2017-12-03 [1] CRAN (R 3.5.0)              
#>  remotes             2.0.2      2018-10-30 [1] CRAN (R 3.5.0)              
#>  rlang               0.3.4      2019-04-07 [1] RSPM (R 3.5.2)              
#>  rmarkdown           1.11       2018-12-08 [1] CRAN (R 3.5.0)              
#>  robustbase          0.93-3     2018-09-21 [1] CRAN (R 3.5.0)              
#>  rprojroot           1.3-2      2018-01-03 [1] CRAN (R 3.5.0)              
#>  scales              1.0.0      2018-08-09 [1] CRAN (R 3.5.0)              
#>  seriation           1.2-3      2018-02-05 [1] CRAN (R 3.5.0)              
#>  sessioninfo         1.1.1      2018-11-05 [1] CRAN (R 3.5.0)              
#>  SomaGlobals       * 3.0.1      2019-04-20 [1] RSPM (R 3.5.2)              
#>  SomaNormalization * 3.0.1.9000 2019-05-01 [1] local                       
#>  SomaPlot            3.0.1      2019-04-20 [1] RSPM (R 3.5.2)              
#>  SomaPlyr          * 3.0.1.9000 2019-04-30 [1] local                       
#>  SomaReadr         * 3.1.0      2019-04-20 [1] RSPM (R 3.5.2)              
#>  stringi             1.4.3      2019-03-12 [1] CRAN (R 3.5.2)              
#>  stringr             1.4.0      2019-02-10 [1] RSPM (R 3.5.2)              
#>  testthat            2.0.1      2018-10-13 [1] CRAN (R 3.5.0)              
#>  tibble              2.1.1      2019-03-16 [1] RSPM (R 3.5.2)              
#>  tidyr               0.8.3      2019-03-01 [1] RSPM (R 3.5.2)              
#>  tidyselect          0.2.5      2018-10-11 [1] CRAN (R 3.5.0)              
#>  trimcluster         0.1-2.1    2018-07-20 [1] CRAN (R 3.5.0)              
#>  TSP                 1.1-6      2018-04-30 [1] CRAN (R 3.5.0)              
#>  usethis             1.4.0      2018-08-14 [1] CRAN (R 3.5.0)              
#>  utf8                1.1.4      2018-05-24 [1] CRAN (R 3.5.0)              
#>  viridis             0.5.1      2018-03-29 [1] CRAN (R 3.5.0)              
#>  viridisLite         0.3.0      2018-02-01 [1] CRAN (R 3.5.0)              
#>  whisker             0.3-2      2013-04-28 [1] CRAN (R 3.5.0)              
#>  withr               2.1.2      2018-03-15 [1] CRAN (R 3.5.0)              
#>  xfun                0.5        2019-02-20 [1] CRAN (R 3.5.2)              
#>  yaml                2.2.0      2018-07-25 [1] CRAN (R 3.5.0)              
#> 
#> [1] /Users/sfield/r_libs
#> [2] /Library/Frameworks/R.framework/Versions/3.5/Resources/library
#include <numeric>
#include <Rcpp.h>
using namespace Rcpp;
/*
intended to be used in conjunction with apply(x, 1, ssDAMN, ...)
to loop over samples/rows of the data.frame passed 'x'.
*/
// [[Rcpp::export]]
List ssDAMN(const NumericVector& x, // named vector length ~ 5500
NumericVector& ref_median, // named vector length ~ 5500
const NumericVector& ref_median_log, // named vector ~ 5500
const NumericVector& ref_mad, // named vector ~ 5500
List dil_split, // list of characters length = 3
List dil_split_protein, // list of characters length = 3
double effect_cut) { // effect size default = 2.0
CharacterVector apts = x.names();
int L = x.length();
int max = 100;
CharacterVector dil_names = dil_split.names();
int dil_L = dil_names.length();
NumericMatrix SF_mat(max, dil_L);
SF_mat.attr("dimnames") = List::create(R_NilValue, dil_names);
NumericVector apt_DAMN(L);
CharacterVector apts_old(L);
NumericVector sample_vec = clone(x);
/* Initialize empty objects for use inside loop */
NumericVector dat_effect(L);
LogicalVector cut(L);
CharacterVector apts_cut(L);
CharacterVector apts_use(L);
bool lgl1 = NA_LOGICAL;
bool lgl2 = NA_LOGICAL;
CharacterVector mix_prot(L);
CharacterVector inter(L);
IntegerVector match_id;
IntegerVector sample_id(L);
NumericVector new_vals(L);
double SF;
/* list of indices for each mix */
List mix_idx(dil_L);
CharacterVector mix_apts;
for (int i = 0; i < dil_L; ++i) {
mix_apts = dil_split[i];
mix_idx[i] = match(mix_apts, apts) - 1;
}
/* The main loop; convergence */
int counter = 0;
while( counter <= max ) { // typically converge in < 15
//Rcout << "iter: " << counter << std::endl;
dat_effect = (log10(sample_vec) - ref_median_log) / ref_mad; // vectorized effect size ~ 5500
cut = abs(dat_effect) > effect_cut;
apts_cut = apts[cut];
apts_use = setdiff(apts, apts_cut);
/*
Rcout << "cut: " << apts_cut << std::endl;
Rcout << "use: " << apts_use << std::endl;
Rcout << "old: " << apts_old << std::endl;
*/
lgl1 = apts_cut.size() == apts_old.size(); // check lengths equal
lgl2 = is_true(all(apts_cut == apts_old)); // check all equal
if (counter == max) {
apt_DAMN = dat_effect;
break;
} else if (lgl1 & lgl2) {
apt_DAMN = dat_effect;
break;
} else {
apts_old = apts_cut;
counter += 1; // iterate the counter
}
for (int k = 0; k < dil_L; ++k) { // k = 0:2
mix_prot = dil_split_protein[k];
inter = intersect(apts_use, mix_prot);
match_id = match(inter, apts) - 1;
/* calculate scale factor for current dilution mix */
SF = median(ref_median[ match_id ] / sample_vec[ match_id ], true);
//Rcout << "SF: " << SF << std::endl;
sample_id = mix_idx[k]; // get indices from list to scale
new_vals = as<NumericVector>(sample_vec[sample_id]) * SF; // scale values
sample_vec[sample_id] = new_vals; // update sample_vec
/*
IntegerVector::iterator j;
IntegerVector::iterator j_end = sample_id.end();
for (j = sample_id.begin(); j != j_end; ++j)
sample_vec[*j] = sample_vec[*j] * SF;
*/
SF_mat(counter - 1, k) = SF; // store scale factor in SF_mat
}
}
/* end loops */
/* Clean up and store relevant objects */
/* Set up SF_vec and its names */
CharacterVector sf_names(dil_L, "NormScale.");
for (int i = 0; i < dil_L; ++i) {
sf_names[i] += dil_names[i];
}
/* replaces apply(SF_mat, 2, prod); better option? */
NumericVector SF_vec(dil_L);
NumericVector v(max);
for (int p = 0; p < dil_L; ++p) { // length dil_L = 3
v = SF_mat.column( p );
v = v[ v > 0 ];
SF_vec[p] = std::accumulate(v.begin(), v.end(), 1.0, std::multiplies<double>());
}
SF_vec.names() = sf_names;
apt_DAMN.names() = apts;
return List::create(Named("MedDat", sample_vec),
Named("DAMN", apt_DAMN),
Named("SF", SF_vec));
}
/***R
# set.seed(100)
# n <- 25
# nms <- LETTERS[1:n]
# mat <- matrix(rnorm(5 * n, mean = 100), ncol = n, dimnames = list(NULL, nms))
# a <- apply(mat, 2, median)
# b <- n:1 %>% purrr::set_names(nms)
# c <- rexp(n) %>% purrr::set_names(nms)
# dil <- split(nms, sample(1:3, n, replace = TRUE))
# t0 <- Sys.time()
# ret <- apply(mat, 1, ssDAMN, ref_median = a,
# ref_median_log = c, ref_mad = b,
# dil_split = dil, dil_split_protein = dil,
# effect_cut = 0.5) %>%
# purrr::transpose() %>%
# purrr::map(~ do.call(rbind, .x) %>% data.frame())
# t1 <- Sys.time()
# all.equal(ret, readRDS("ret.rds")); t1 - t0
*/
/***R
df <- system.file("medianNorm_references",
"Population_Reference_V4_Covance_plasma.rds",
package = "SomaNormalization", mustWork = TRUE) %>%
readRDS() %>%
filter(SITE_ID == "Boise" & BMI < 21) %>%
dplyr::select(SampleType, SampleMatrix, SlideId, Subarray, SampleId, SampleGroup,
getNormNames(.), sample(getAptamers(.), 500))
mark <- bench::mark(
orig_ssDAMN = suppressMessages(singleSampleDAMN(df)), # original
cpp_ssDAMN = suppressMessages(singleSampleDAMN2(df)), # uses Rcpp
filter_gc = FALSE, relative = F, iterations = 5
)
mark
#summary(bnch, relative = TRUE)
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment