mrdwab/MERGE.R

## MERGE.R
MERGE <- function (x, y, by = intersect(names(x), names(y)), by.x = by,
    by.y = by, all = FALSE, all.x = all, all.y = all, sort = TRUE,
    suffixes = c(".x", ".y"), incomparables = NULL, ...)
{
    fix.by <- function(by, df) {
        if (is.null(by))
            by <- numeric()
        by <- as.vector(by)
        nc <- ncol(df)
        if (is.character(by)) {
            poss <- c("row.names", names(df))
            if (any(!charmatch(by, poss, 0L)))
                stop("'by' must specify uniquely valid column(s)")
            by <- match(by, poss) - 1L
        }
        else if (is.numeric(by)) {
            if (any(by < 0L) || any(by > nc))
                stop("'by' must match numbers of columns")
        }
        else if (is.logical(by)) {
            if (length(by) != nc)
                stop("'by' must match number of columns")
            by <- seq_along(by)[by]
        }
        else stop("'by' must specify column(s) as numbers, names or logical")
        if (any(is.na(by)))
            stop("'by' must specify valid column(s)")
        unique(by)
    }
    nx <- nrow(x <- as.data.frame(x))
    ny <- nrow(y <- as.data.frame(y))
    by.x <- fix.by(by.x, x)
    by.y <- fix.by(by.y, y)
    if ((l.b <- length(by.x)) != length(by.y))
        stop("'by.x' and 'by.y' specify different numbers of columns")
    if (l.b == 0L) {
        nm <- nm.x <- names(x)
        nm.y <- names(y)
        has.common.nms <- any(cnm <- nm.x %in% nm.y)
        if (has.common.nms) {
            names(x)[cnm] <- paste0(nm.x[cnm], suffixes[1L])
            cnm <- nm.y %in% nm
            names(y)[cnm] <- paste0(nm.y[cnm], suffixes[2L])
        }
        if (nx == 0L || ny == 0L) {
            res <- cbind(x[FALSE, ], y[FALSE, ])
        }
        else {
            ij <- expand.grid(seq_len(nx), seq_len(ny))
            res <- cbind(x[ij[, 1L], , drop = FALSE], y[ij[,
                2L], , drop = FALSE])
        }
    }
    else {
        if (any(by.x == 0L)) {
            x <- cbind(Row.names = I(row.names(x)), x)
            by.x <- by.x + 1L
        }
        if (any(by.y == 0L)) {
            y <- cbind(Row.names = I(row.names(y)), y)
            by.y <- by.y + 1L
        }
#        row.names(x) <- NULL
#        row.names(y) <- NULL
        if (l.b == 1L) {
            bx <- x[, by.x]
            if (is.factor(bx))
                bx <- as.character(bx)
            by <- y[, by.y]
            if (is.factor(by))
                by <- as.character(by)
        }
        else {
            bx <- x[, by.x, drop = FALSE]
            by <- y[, by.y, drop = FALSE]
            names(bx) <- names(by) <- paste0("V", seq_len(ncol(bx)))
            bz <- do.call("paste", c(rbind(bx, by), sep = "\r"))
            bx <- bz[seq_len(nx)]
            by <- bz[nx + seq_len(ny)]
        }
        comm <- match(bx, by, 0L)
        bxy <- bx[comm > 0L]
        xinds <- match(bx, bxy, 0L, incomparables)
        yinds <- match(by, bxy, 0L, incomparables)
        if (nx > 0L && ny > 0L)
            m <- .Internal(merge(xinds, yinds, all.x, all.y))
        else m <- list(xi = integer(), yi = integer(), x.alone = seq_len(nx),
            y.alone = seq_len(ny))
        nm <- nm.x <- names(x)[-by.x]
        nm.by <- names(x)[by.x]
        nm.y <- names(y)[-by.y]
        ncx <- ncol(x)
        if (all.x)
            all.x <- (nxx <- length(m$x.alone)) > 0L
        if (all.y)
            all.y <- (nyy <- length(m$y.alone)) > 0L
        lxy <- length(m$xi)
        has.common.nms <- any(cnm <- nm.x %in% nm.y)
        if (has.common.nms && nzchar(suffixes[1L]))
            nm.x[cnm] <- paste0(nm.x[cnm], suffixes[1L])
        x <- x[c(m$xi, if (all.x) m$x.alone), c(by.x, seq_len(ncx)[-by.x]),
            drop = FALSE]
        names(x) <- c(nm.by, nm.x)
        if (all.y) {
            ya <- y[m$y.alone, by.y, drop = FALSE]
            names(ya) <- nm.by
            ya <- cbind(ya, x[rep.int(NA_integer_, nyy), nm.x,
                drop = FALSE])
            x <- rbind(x, ya)
        }
        if (has.common.nms && nzchar(suffixes[2L])) {
            cnm <- nm.y %in% nm
            nm.y[cnm] <- paste0(nm.y[cnm], suffixes[2L])
        }
        y <- y[c(m$yi, if (all.x) rep.int(1L, nxx), if (all.y) m$y.alone),
            -by.y, drop = FALSE]
        if (all.x) {
            zap <- (lxy + 1L):(lxy + nxx)
            for (i in seq_along(y)) {
                if (is.matrix(y[[1]]))
                  y[[1]][zap, ] <- NA
                else is.na(y[[i]]) <- zap
            }
        }
        if (has.common.nms)
            names(y) <- nm.y
        nm <- c(names(x), names(y))
        if (any(d <- duplicated(nm)))
            if (sum(d) > 1L)
                warning("column names ", paste(sQuote(nm[d]),
                  collapse = ", "), " are duplicated in the result",
                  domain = NA)
            else warning("column name ", sQuote(nm[d]), " is duplicated in the result",
                domain = NA)
        res <- cbind(x, y)
        if (sort)
            res <- res[if (all.x || all.y)
                do.call("order", x[, seq_len(l.b), drop = FALSE])
            else sort.list(bx[m$xi]), , drop = FALSE]
    }
#    attr(res, "row.names") <- .set_row_names(nrow(res))
    res
}
	MERGE <- function (x, y, by = intersect(names(x), names(y)), by.x = by,
	by.y = by, all = FALSE, all.x = all, all.y = all, sort = TRUE,
	suffixes = c(".x", ".y"), incomparables = NULL, ...)
	{
	fix.by <- function(by, df) {
	if (is.null(by))
	by <- numeric()
	by <- as.vector(by)
	nc <- ncol(df)
	if (is.character(by)) {
	poss <- c("row.names", names(df))
	if (any(!charmatch(by, poss, 0L)))
	stop("'by' must specify uniquely valid column(s)")
	by <- match(by, poss) - 1L
	}
	else if (is.numeric(by)) {
	if (any(by < 0L) \|\| any(by > nc))
	stop("'by' must match numbers of columns")
	}
	else if (is.logical(by)) {
	if (length(by) != nc)
	stop("'by' must match number of columns")
	by <- seq_along(by)[by]
	}
	else stop("'by' must specify column(s) as numbers, names or logical")
	if (any(is.na(by)))
	stop("'by' must specify valid column(s)")
	unique(by)
	}
	nx <- nrow(x <- as.data.frame(x))
	ny <- nrow(y <- as.data.frame(y))
	by.x <- fix.by(by.x, x)
	by.y <- fix.by(by.y, y)
	if ((l.b <- length(by.x)) != length(by.y))
	stop("'by.x' and 'by.y' specify different numbers of columns")
	if (l.b == 0L) {
	nm <- nm.x <- names(x)
	nm.y <- names(y)
	has.common.nms <- any(cnm <- nm.x %in% nm.y)
	if (has.common.nms) {
	names(x)[cnm] <- paste0(nm.x[cnm], suffixes[1L])
	cnm <- nm.y %in% nm
	names(y)[cnm] <- paste0(nm.y[cnm], suffixes[2L])
	}
	if (nx == 0L \|\| ny == 0L) {
	res <- cbind(x[FALSE, ], y[FALSE, ])
	}
	else {
	ij <- expand.grid(seq_len(nx), seq_len(ny))
	res <- cbind(x[ij[, 1L], , drop = FALSE], y[ij[,
	2L], , drop = FALSE])
	}
	}
	else {
	if (any(by.x == 0L)) {
	x <- cbind(Row.names = I(row.names(x)), x)
	by.x <- by.x + 1L
	}
	if (any(by.y == 0L)) {
	y <- cbind(Row.names = I(row.names(y)), y)
	by.y <- by.y + 1L
	}
	# row.names(x) <- NULL
	# row.names(y) <- NULL
	if (l.b == 1L) {
	bx <- x[, by.x]
	if (is.factor(bx))
	bx <- as.character(bx)
	by <- y[, by.y]
	if (is.factor(by))
	by <- as.character(by)
	}
	else {
	bx <- x[, by.x, drop = FALSE]
	by <- y[, by.y, drop = FALSE]
	names(bx) <- names(by) <- paste0("V", seq_len(ncol(bx)))
	bz <- do.call("paste", c(rbind(bx, by), sep = "\r"))
	bx <- bz[seq_len(nx)]
	by <- bz[nx + seq_len(ny)]
	}
	comm <- match(bx, by, 0L)
	bxy <- bx[comm > 0L]
	xinds <- match(bx, bxy, 0L, incomparables)
	yinds <- match(by, bxy, 0L, incomparables)
	if (nx > 0L && ny > 0L)
	m <- .Internal(merge(xinds, yinds, all.x, all.y))
	else m <- list(xi = integer(), yi = integer(), x.alone = seq_len(nx),
	y.alone = seq_len(ny))
	nm <- nm.x <- names(x)[-by.x]
	nm.by <- names(x)[by.x]
	nm.y <- names(y)[-by.y]
	ncx <- ncol(x)
	if (all.x)
	all.x <- (nxx <- length(m$x.alone)) > 0L
	if (all.y)
	all.y <- (nyy <- length(m$y.alone)) > 0L
	lxy <- length(m$xi)
	has.common.nms <- any(cnm <- nm.x %in% nm.y)
	if (has.common.nms && nzchar(suffixes[1L]))
	nm.x[cnm] <- paste0(nm.x[cnm], suffixes[1L])
	x <- x[c(m$xi, if (all.x) m$x.alone), c(by.x, seq_len(ncx)[-by.x]),
	drop = FALSE]
	names(x) <- c(nm.by, nm.x)
	if (all.y) {
	ya <- y[m$y.alone, by.y, drop = FALSE]
	names(ya) <- nm.by
	ya <- cbind(ya, x[rep.int(NA_integer_, nyy), nm.x,
	drop = FALSE])
	x <- rbind(x, ya)
	}
	if (has.common.nms && nzchar(suffixes[2L])) {
	cnm <- nm.y %in% nm
	nm.y[cnm] <- paste0(nm.y[cnm], suffixes[2L])
	}
	y <- y[c(m$yi, if (all.x) rep.int(1L, nxx), if (all.y) m$y.alone),
	-by.y, drop = FALSE]
	if (all.x) {
	zap <- (lxy + 1L):(lxy + nxx)
	for (i in seq_along(y)) {
	if (is.matrix(y[[1]]))
	y[[1]][zap, ] <- NA
	else is.na(y[[i]]) <- zap
	}
	}
	if (has.common.nms)
	names(y) <- nm.y
	nm <- c(names(x), names(y))
	if (any(d <- duplicated(nm)))
	if (sum(d) > 1L)
	warning("column names ", paste(sQuote(nm[d]),
	collapse = ", "), " are duplicated in the result",
	domain = NA)
	else warning("column name ", sQuote(nm[d]), " is duplicated in the result",
	domain = NA)
	res <- cbind(x, y)
	if (sort)
	res <- res[if (all.x \|\| all.y)
	do.call("order", x[, seq_len(l.b), drop = FALSE])
	else sort.list(bx[m$xi]), , drop = FALSE]
	}
	# attr(res, "row.names") <- .set_row_names(nrow(res))
	res
	}