giupo/python_test.py

## python_test.py
#
# I'm running the same tests described here : http://wesmckinney.com/blog/?p=268
#

from pandas import *
from pandas.util.testing import rands

n = 100000
indices = Index([rands(10) for _ in xrange(n)])

def sample(values, k):
    from random import shuffle
    sampler = np.arange(len(values))
    shuffle(sampler)
    return values.take(sampler[:k])

subsample_size = 90000

x = Series(np.random.randn(n), indices)
y = Series(np.random.randn(subsample_size),
           index=sample(indices, subsample_size))

xs = x.sort_index()

ys = y.sort_index()
from pandas.util.testing import rands

# -----------

>>> timeit x+y
1 loops, best of 3: 349 ms per loop

>>> timeit xs+ys
1 loops, best of 3: 85.7 ms per loop

## test_R.R
library(zoo)

indices = rep(NA, 100000)
for (i in 1:100000)
  indices[i] <- paste(sample(letters, 10), collapse="")

timings <- numeric()

x <- zoo(rnorm(100000), indices)
y <- zoo(rnorm(90000), indices[sample(1:100000, 90000)])

for (i in 1:10) {
  gc()
  timings[i] = system.time(x + y)[3]
}

> mean(timings)

[1] 3.2465
	#
	# I'm running the same tests described here : http://wesmckinney.com/blog/?p=268
	#

	from pandas import *
	from pandas.util.testing import rands

	n = 100000
	indices = Index([rands(10) for _ in xrange(n)])

	def sample(values, k):
	from random import shuffle
	sampler = np.arange(len(values))
	shuffle(sampler)
	return values.take(sampler[:k])

	subsample_size = 90000

	x = Series(np.random.randn(n), indices)
	y = Series(np.random.randn(subsample_size),
	index=sample(indices, subsample_size))

	xs = x.sort_index()

	ys = y.sort_index()
	from pandas.util.testing import rands

	# -----------

	>>> timeit x+y
	1 loops, best of 3: 349 ms per loop

	>>> timeit xs+ys
	1 loops, best of 3: 85.7 ms per loop
	library(zoo)

	indices = rep(NA, 100000)
	for (i in 1:100000)
	indices[i] <- paste(sample(letters, 10), collapse="")

	timings <- numeric()

	x <- zoo(rnorm(100000), indices)
	y <- zoo(rnorm(90000), indices[sample(1:100000, 90000)])

	for (i in 1:10) {
	gc()
	timings[i] = system.time(x + y)[3]
	}

	> mean(timings)

	[1] 3.2465