pimentel/data.table_roll-nearest.R

## data.table_roll-nearest.R
DT1 <- data.table(x = 1:5)
DT2 <- data.table(y = (1:5) + 0.01)
setkey(DT1, x)
setkey(DT2, y)

# the following does not maintain both columns
DT1[DT2, roll = 'nearest']
#    x
# 1: 1
# 2: 2
# 3: 3
# 4: 4
# 5: 5

# the column is renamed here
DT2[DT1, roll = 'nearest']
#    y
# 1: 1
# 2: 2
# 3: 3
# 4: 4
# 5: 5

# hack to force keeping of column
DT1[DT2[, list(y, z = y)], roll = 'nearest']
#    x    z
# 1: 1 1.01
# 2: 2 2.01
# 3: 3 3.01
# 4: 4 4.01
# 5: 5 5.01

sessionInfo()
# R version 3.3.0 (2016-05-03)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: Ubuntu 14.04.4 LTS
#
# locale:
#  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C
#  [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#
# attached base packages:
# [1] stats4    parallel  stats     graphics  grDevices utils     datasets  methods   base
#
# other attached packages:
#  [1] edgeR_3.14.0               limma_3.28.2               cowplot_0.6.2              sleuth_0.28.0              BiocInstaller_1.22.1       devtools_1.11.1            mamabear_0.0.1             ggplot2_2.1.0
#  [9] lazyeval_0.1.10            dplyr_0.4.3                EBSeq_1.12.0               testthat_1.0.2             gplots_3.0.1               blockmodeling_0.1.8        DESeq2_1.12.0              SummarizedExperiment_1.2.0
# [17] GenomicRanges_1.24.0       GenomeInfoDb_1.8.0         IRanges_2.6.0              S4Vectors_0.10.0           DESeq_1.24.0               lattice_0.20-33            locfit_1.5-9.1             Biobase_2.32.0
# [25] BiocGenerics_0.18.0        data.table_1.9.6
#
# loaded via a namespace (and not attached):
#  [1] Rcpp_0.12.4          tidyr_0.4.1          gtools_3.5.0         assertthat_0.1       digest_0.6.9         R6_2.1.2             plyr_1.8.3           chron_2.3-47         acepack_1.3-3.3      RSQLite_1.0.0        zlibbioc_1.18.0
# [12] annotate_1.50.0      gdata_2.17.0         rpart_4.1-10         Matrix_1.2-6         labeling_0.3         splines_3.3.0        BiocParallel_1.6.0   stringr_1.0.0        geneplotter_1.50.0   foreign_0.8-66       RCurl_1.95-4.8
# [23] biomaRt_2.28.0       munsell_0.4.3        nnet_7.3-12          gridExtra_2.2.1      Hmisc_3.17-4         XML_3.98-1.4         crayon_1.3.1         withr_1.0.1          bitops_1.0-6         grid_3.3.0           xtable_1.8-2
# [34] gtable_0.2.0         DBI_0.4-1            magrittr_1.5         scales_0.4.0         KernSmooth_2.23-15   stringi_1.0-1        reshape2_1.4.1       XVector_0.12.0       genefilter_1.54.0    latticeExtra_0.6-28  Formula_1.2-1
# [45] RColorBrewer_1.1-2   tools_3.3.0          survival_2.39-4      AnnotationDbi_1.34.0 colorspace_1.2-6     cluster_2.0.4        caTools_1.17.1       memoise_1.0.0
	DT1 <- data.table(x = 1:5)
	DT2 <- data.table(y = (1:5) + 0.01)
	setkey(DT1, x)
	setkey(DT2, y)

	# the following does not maintain both columns
	DT1[DT2, roll = 'nearest']
	# x
	# 1: 1
	# 2: 2
	# 3: 3
	# 4: 4
	# 5: 5

	# the column is renamed here
	DT2[DT1, roll = 'nearest']
	# y
	# 1: 1
	# 2: 2
	# 3: 3
	# 4: 4
	# 5: 5

	# hack to force keeping of column
	DT1[DT2[, list(y, z = y)], roll = 'nearest']
	# x z
	# 1: 1 1.01
	# 2: 2 2.01
	# 3: 3 3.01
	# 4: 4 4.01
	# 5: 5 5.01

	sessionInfo()
	# R version 3.3.0 (2016-05-03)
	# Platform: x86_64-pc-linux-gnu (64-bit)
	# Running under: Ubuntu 14.04.4 LTS
	#
	# locale:
	# [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C
	# [9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
	#
	# attached base packages:
	# [1] stats4 parallel stats graphics grDevices utils datasets methods base
	#
	# other attached packages:
	# [1] edgeR_3.14.0 limma_3.28.2 cowplot_0.6.2 sleuth_0.28.0 BiocInstaller_1.22.1 devtools_1.11.1 mamabear_0.0.1 ggplot2_2.1.0
	# [9] lazyeval_0.1.10 dplyr_0.4.3 EBSeq_1.12.0 testthat_1.0.2 gplots_3.0.1 blockmodeling_0.1.8 DESeq2_1.12.0 SummarizedExperiment_1.2.0
	# [17] GenomicRanges_1.24.0 GenomeInfoDb_1.8.0 IRanges_2.6.0 S4Vectors_0.10.0 DESeq_1.24.0 lattice_0.20-33 locfit_1.5-9.1 Biobase_2.32.0
	# [25] BiocGenerics_0.18.0 data.table_1.9.6
	#
	# loaded via a namespace (and not attached):
	# [1] Rcpp_0.12.4 tidyr_0.4.1 gtools_3.5.0 assertthat_0.1 digest_0.6.9 R6_2.1.2 plyr_1.8.3 chron_2.3-47 acepack_1.3-3.3 RSQLite_1.0.0 zlibbioc_1.18.0
	# [12] annotate_1.50.0 gdata_2.17.0 rpart_4.1-10 Matrix_1.2-6 labeling_0.3 splines_3.3.0 BiocParallel_1.6.0 stringr_1.0.0 geneplotter_1.50.0 foreign_0.8-66 RCurl_1.95-4.8
	# [23] biomaRt_2.28.0 munsell_0.4.3 nnet_7.3-12 gridExtra_2.2.1 Hmisc_3.17-4 XML_3.98-1.4 crayon_1.3.1 withr_1.0.1 bitops_1.0-6 grid_3.3.0 xtable_1.8-2
	# [34] gtable_0.2.0 DBI_0.4-1 magrittr_1.5 scales_0.4.0 KernSmooth_2.23-15 stringi_1.0-1 reshape2_1.4.1 XVector_0.12.0 genefilter_1.54.0 latticeExtra_0.6-28 Formula_1.2-1
	# [45] RColorBrewer_1.1-2 tools_3.3.0 survival_2.39-4 AnnotationDbi_1.34.0 colorspace_1.2-6 cluster_2.0.4 caTools_1.17.1 memoise_1.0.0