arunsrinivasan/17844143.R Secret

## 17844143.R
require(qdapTools)
set.seed(1L)
x = data.frame(zip=sample(1e6), market=0L)
y = data.frame(market=sample(20, 2000, TRUE), zip=sample(1e6, 2000, FALSE))

#### this line takes a long time... Tyler any ideas why?
x$market = lookup(x$zip, y[, 2:1])
####

# I think the actual answer is:
system.time({
idx1 = match(x$zip, y$zip)
idx2 = which(!is.na(idx1))
x$market[idx2] <- y$market[idx1[idx2]]
})
# takes 0.085 seconds

## here's a data.table solution using joins - although this is not necessary here:
require(data.table)

set.seed(1L)
x = data.frame(zip=sample(1e6), market=0L)
y = data.frame(market=sample(20, 2000, TRUE), zip=sample(1e6, 2000, FALSE))

system.time({
setDT(x)
setDT(y)

setkey(x, zip)
setkey(y, zip)
x[y, market := i.market]
})
## takes 0.094 seconds

## here's using match and `:=` in data.table
require(data.table)

set.seed(1L)
x = data.frame(zip=sample(1e6), market=0L)
y = data.frame(market=sample(20, 2000, TRUE), zip=sample(1e6, 2000, FALSE))

system.time({
idx = match(x$zip, y$zip, nomatch=0L)
setDT(x)[idx != 0L, market := y$market[idx]]
})
## takes 0.58 seconds.
	require(qdapTools)
	set.seed(1L)
	x = data.frame(zip=sample(1e6), market=0L)
	y = data.frame(market=sample(20, 2000, TRUE), zip=sample(1e6, 2000, FALSE))

	#### this line takes a long time... Tyler any ideas why?
	x$market = lookup(x$zip, y[, 2:1])
	####

	# I think the actual answer is:
	system.time({
	idx1 = match(x$zip, y$zip)
	idx2 = which(!is.na(idx1))
	x$market[idx2] <- y$market[idx1[idx2]]
	})
	# takes 0.085 seconds

	## here's a data.table solution using joins - although this is not necessary here:
	require(data.table)

	set.seed(1L)
	x = data.frame(zip=sample(1e6), market=0L)
	y = data.frame(market=sample(20, 2000, TRUE), zip=sample(1e6, 2000, FALSE))

	system.time({
	setDT(x)
	setDT(y)

	setkey(x, zip)
	setkey(y, zip)
	x[y, market := i.market]
	})
	## takes 0.094 seconds

	## here's using match and `:=` in data.table
	require(data.table)

	set.seed(1L)
	x = data.frame(zip=sample(1e6), market=0L)
	y = data.frame(market=sample(20, 2000, TRUE), zip=sample(1e6, 2000, FALSE))

	system.time({
	idx = match(x$zip, y$zip, nomatch=0L)
	setDT(x)[idx != 0L, market := y$market[idx]]
	})
	## takes 0.58 seconds.