Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
pandas failures, pandas HEAD (3e9e947b8), numpy branch refactor-updateifcopy2, pypy
grep -a '\(^E *[A-Z][a-zA-Z]*Error:\)\|\(^AssertionError$\)\|\(^E *MemoryError\)' ../test_pandas.txt |cut -c 5- |cut -f1,2 -d: | sed -e's/ */ /' | sort |uniq -c |sort -n -r
20 TypeError: sys.getsizeof() is not implemented on PyPy.
8 SystemError: An exception was set, but function returned a value
6 AssertionError: Attributes are different
4 SystemError: Bad internal call!
4 AttributeError: 'str' object has no attribute '__sizeof__'
4 AssertionError: numpy array are different
3 PicklingError: Can't pickle <type 'pandas._libs.sparse.BlockIndex'>
2 SettingWithCopyError:
2 OperationalError: database table is locked
2 AssertionError: Series.index are different
1 ValueError: ('year must be in 1..9999', 0)
1 TypeError: ufunc 'add' did not contain a loop with signature matching types dtype('<U10') dtype('<U10') dtype('<U10')
1 TypeError: ufunc 'add' did not contain a loop with signature matching types dtype('S6') dtype('S6') dtype('S6')
1 TypeError: expected a readable buffer object
1 PicklingError: Can't pickle <type 'pandas._libs.sparse.IntIndex'>
1 AttributeError: 'unicode' object has no attribute '__sizeof__'
1 AttributeError: 'int' object has no attribute '__sizeof__'
1 AttributeError: 'bool' object has no attribute '__sizeof__'
1 AssertionError: Wrong exception
1 AssertionError: TypeError not raised.
1 AssertionError: ""quoting" must be an integer" does not match "expected integer, got str object"
1 AssertionError: "pickle protocol 4 asked for; the highest available protocol is 2" does not match "pickle protocol must be <= 2"
1 AssertionError: "pickle protocol 3 asked for; the highest available protocol is 2" does not match "pickle protocol must be <= 2"
1 AssertionError: "can't multiply sequence by non-int" does not match "unsupported operand type(s) for *
1 AssertionError: assert False
1 AssertionError: assert '2' not in Int64Index([0, 1, 2], dtype='int64')
============================= test session starts ==============================
platform linux2 -- Python 2.7.13[pypy-5.9.0-alpha], pytest-3.2.0, py-1.4.34, pluggy-0.4.0 -- /home/matti/pypy_stuff/pypy-test/bin/pypy
cachedir: .cache
rootdir: /home/matti/pypy_stuff/pandas, inifile: setup.cfg
plugins: xdist-1.18.2
gw0 I / gw1 I / gw2 I / gw3 I
[gw0] linux2 Python 2.7.13 cwd: /home/matti/pypy_stuff/pandas
[gw1] linux2 Python 2.7.13 cwd: /home/matti/pypy_stuff/pandas
[gw2] linux2 Python 2.7.13 cwd: /home/matti/pypy_stuff/pandas
[gw3] linux2 Python 2.7.13 cwd: /home/matti/pypy_stuff/pandas
[gw0] Python 2.7.13 (dcea72970920+, Aug 09 2017, 14:24:29) -- [PyPy 5.9.0-alpha0 with GCC 5.4.0 20160609]
[gw1] Python 2.7.13 (dcea72970920+, Aug 09 2017, 14:24:29) -- [PyPy 5.9.0-alpha0 with GCC 5.4.0 20160609]
[gw2] Python 2.7.13 (dcea72970920+, Aug 09 2017, 14:24:29) -- [PyPy 5.9.0-alpha0 with GCC 5.4.0 20160609]
[gw3] Python 2.7.13 (dcea72970920+, Aug 09 2017, 14:24:29) -- [PyPy 5.9.0-alpha0 with GCC 5.4.0 20160609]
gw0 [11637] / gw1 [11637] / gw2 [11637] / gw3 [11637]
scheduling tests via LoadScheduling
==================================== ERRORS ====================================
______________ ERROR at teardown of TestSQLiteFallback.test_dtype ______________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.test_sql.TestSQLiteFallback object at 0x000000000d238ad8>
method = <bound method TestSQLiteFallback.test_dtype of <pandas.tests.io.test_sql.TestSQLiteFallback object at 0x000000000d238ad8>>
def teardown_method(self, method):
for tbl in self._get_all_tables():
> self.drop_table(tbl)
pandas/tests/io/test_sql.py:183:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/tests/io/test_sql.py:212: in drop_table
sql._get_valid_sqlite_name(table_name))
../pypy/lib_pypy/_sqlite3.py:400: in execute
return cur.execute(*args)
../pypy/lib_pypy/_sqlite3.py:728: in wrapper
return func(self, *args, **kwargs)
../pypy/lib_pypy/_sqlite3.py:895: in execute
return self.__execute(False, sql, [params])
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <_sqlite3.Cursor object at 0x000000000d2ff600>, multiple = False
sql = 'DROP TABLE IF EXISTS "iris"', many_params = [[]]
def __execute(self, multiple, sql, many_params):
self.__locked = True
self._reset = False
try:
del self.__next_row
except AttributeError:
pass
try:
if not isinstance(sql, basestring):
raise ValueError("operation parameter must be str or unicode")
try:
del self.__description
except AttributeError:
pass
self.__rowcount = -1
self.__statement = self.__connection._statement_cache.get(sql)
if self.__connection._isolation_level is not None:
if self.__statement._type in (
_STMT_TYPE_UPDATE,
_STMT_TYPE_DELETE,
_STMT_TYPE_INSERT,
_STMT_TYPE_REPLACE
):
if not self.__connection._in_transaction:
self.__connection._begin()
elif self.__statement._type == _STMT_TYPE_OTHER:
if self.__connection._in_transaction:
self.__connection.commit()
elif self.__statement._type == _STMT_TYPE_SELECT:
if multiple:
raise ProgrammingError("You cannot execute SELECT "
"statements in executemany().")
for params in many_params:
self.__statement._set_params(params)
# Actually execute the SQL statement
ret = _lib.sqlite3_step(self.__statement._statement)
# PyPy: if we get SQLITE_LOCKED, it's probably because
# one of the cursors created previously is still alive
# and not reset and the operation we're trying to do
# makes Sqlite unhappy about that. In that case, we
# automatically reset all old cursors and try again.
if ret == _lib.SQLITE_LOCKED:
self.__connection._reset_already_committed_statements()
ret = _lib.sqlite3_step(self.__statement._statement)
if ret == _lib.SQLITE_ROW:
if multiple:
raise ProgrammingError("executemany() can only execute DML statements.")
self.__build_row_cast_map()
self.__next_row = self.__fetch_one_row()
elif ret == _lib.SQLITE_DONE:
if not multiple:
self.__statement._reset()
else:
self.__statement._reset()
> raise self.__connection._get_exception(ret)
E OperationalError: database table is locked
../pypy/lib_pypy/_sqlite3.py:868: OperationalError
___________ ERROR at teardown of TestSQLiteFallback.test_notna_dtype ___________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.test_sql.TestSQLiteFallback object at 0x000000000d238c98>
method = <bound method TestSQLiteFallback.test_notna_dtype of <pandas.tests.io.test_sql.TestSQLiteFallback object at 0x000000000d238c98>>
def teardown_method(self, method):
for tbl in self._get_all_tables():
> self.drop_table(tbl)
pandas/tests/io/test_sql.py:183:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/tests/io/test_sql.py:212: in drop_table
sql._get_valid_sqlite_name(table_name))
../pypy/lib_pypy/_sqlite3.py:400: in execute
return cur.execute(*args)
../pypy/lib_pypy/_sqlite3.py:728: in wrapper
return func(self, *args, **kwargs)
../pypy/lib_pypy/_sqlite3.py:895: in execute
return self.__execute(False, sql, [params])
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <_sqlite3.Cursor object at 0x000000000d2e9478>, multiple = False
sql = 'DROP TABLE IF EXISTS "iris"', many_params = [[]]
def __execute(self, multiple, sql, many_params):
self.__locked = True
self._reset = False
try:
del self.__next_row
except AttributeError:
pass
try:
if not isinstance(sql, basestring):
raise ValueError("operation parameter must be str or unicode")
try:
del self.__description
except AttributeError:
pass
self.__rowcount = -1
self.__statement = self.__connection._statement_cache.get(sql)
if self.__connection._isolation_level is not None:
if self.__statement._type in (
_STMT_TYPE_UPDATE,
_STMT_TYPE_DELETE,
_STMT_TYPE_INSERT,
_STMT_TYPE_REPLACE
):
if not self.__connection._in_transaction:
self.__connection._begin()
elif self.__statement._type == _STMT_TYPE_OTHER:
if self.__connection._in_transaction:
self.__connection.commit()
elif self.__statement._type == _STMT_TYPE_SELECT:
if multiple:
raise ProgrammingError("You cannot execute SELECT "
"statements in executemany().")
for params in many_params:
self.__statement._set_params(params)
# Actually execute the SQL statement
ret = _lib.sqlite3_step(self.__statement._statement)
# PyPy: if we get SQLITE_LOCKED, it's probably because
# one of the cursors created previously is still alive
# and not reset and the operation we're trying to do
# makes Sqlite unhappy about that. In that case, we
# automatically reset all old cursors and try again.
if ret == _lib.SQLITE_LOCKED:
self.__connection._reset_already_committed_statements()
ret = _lib.sqlite3_step(self.__statement._statement)
if ret == _lib.SQLITE_ROW:
if multiple:
raise ProgrammingError("executemany() can only execute DML statements.")
self.__build_row_cast_map()
self.__next_row = self.__fetch_one_row()
elif ret == _lib.SQLITE_DONE:
if not multiple:
self.__statement._reset()
else:
self.__statement._reset()
> raise self.__connection._get_exception(ret)
E OperationalError: database table is locked
../pypy/lib_pypy/_sqlite3.py:868: OperationalError
=================================== FAILURES ===================================
___________________ TestDuplicated.test_duplicated_with_nas ____________________
[gw0] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.test_algos.TestDuplicated object at 0x000000000c6671d8>
def test_duplicated_with_nas(self):
keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)
result = algos.duplicated(keys)
expected = np.array([False, False, False, True, False, True])
> tm.assert_numpy_array_equal(result, expected)
pandas/tests/test_algos.py:730:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/util/testing.py:1169: in assert_numpy_array_equal
_raise(left, right, err_msg)
pandas/util/testing.py:1163: in _raise
raise_assert_detail(obj, msg, left, right)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
obj = 'numpy array', message = 'numpy array values are different (16.66667 %)'
left = '[False, False, False, True, False, False]'
right = '[False, False, False, True, False, True]', diff = None
def raise_assert_detail(obj, message, left, right, diff=None):
if isinstance(left, np.ndarray):
left = pprint_thing(left)
if isinstance(right, np.ndarray):
right = pprint_thing(right)
msg = """{0} are different
{1}
[left]: {2}
[right]: {3}""".format(obj, message, left, right)
if diff is not None:
msg = msg + "\n[diff]: {diff}".format(diff=diff)
> raise AssertionError(msg)
E AssertionError: numpy array are different
E
E numpy array values are different (16.66667 %)
E [left]: [False, False, False, True, False, False]
E [right]: [False, False, False, True, False, True]
pandas/util/testing.py:1102: AssertionError
_____________________ TestPandasDelegate.test_memory_usage _____________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.test_base.TestPandasDelegate object at 0x0000000002d83280>
def test_memory_usage(self):
# Delegate does not implement memory_usage.
# Check that we fall back to in-built `__sizeof__`
# GH 12924
delegate = self.Delegate(self.Delegator())
> sys.getsizeof(delegate)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/tests/test_base.py:152: TypeError
________________________ TestIndexOps.test_memory_usage ________________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.test_base.TestIndexOps object at 0x0000000006dcd9b8>
def test_memory_usage(self):
for o in self.objs:
res = o.memory_usage()
> res_deep = o.memory_usage(deep=True)
pandas/tests/test_base.py:947:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/base.py:576: in memory_usage
result = super(Index, self).memory_usage(deep=deep)
pandas/core/base.py:1075: in memory_usage
v += lib.memory_usage_of_objects(self.values)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> s += arr[i].__sizeof__()
E AttributeError: 'bool' object has no attribute '__sizeof__'
pandas/_libs/lib.pyx:128: AttributeError
______________________ TestCategorical.test_memory_usage _______________________
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.test_categorical.TestCategorical object at 0x000000000ea8f050>
def test_memory_usage(self):
cat = pd.Categorical([1, 2, 3])
# .categories is an index, so we include the hashtable
assert 0 < cat.nbytes <= cat.memory_usage()
assert 0 < cat.nbytes <= cat.memory_usage(deep=True)
cat = pd.Categorical(['foo', 'foo', 'bar'])
> assert cat.memory_usage(deep=True) > cat.nbytes
pandas/tests/test_categorical.py:1449:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/categorical.py:1134: in memory_usage
return self._codes.nbytes + self._categories.memory_usage(deep=deep)
pandas/core/indexes/base.py:576: in memory_usage
result = super(Index, self).memory_usage(deep=deep)
pandas/core/base.py:1075: in memory_usage
v += lib.memory_usage_of_objects(self.values)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> s += arr[i].__sizeof__()
E AttributeError: 'str' object has no attribute '__sizeof__'
pandas/_libs/lib.pyx:128: AttributeError
_______________________ TestCategoricalAsBlock.test_info _______________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.test_categorical.TestCategoricalAsBlock object at 0x000000000477c678>
def test_info(self):
# make sure it works
n = 2500
df = DataFrame({'int64': np.random.randint(100, size=n)})
df['category'] = Series(np.array(list('abcdefghij')).take(
np.random.randint(0, 10, size=n))).astype('category')
df.isna()
buf = compat.StringIO()
> df.info(buf=buf)
pandas/tests/test_categorical.py:2785:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/frame.py:1835: in info
mem_usage = self.memory_usage(index=True, deep=deep).sum()
pandas/core/frame.py:1871: in memory_usage
result = Series(self.index.memory_usage(deep=deep),
pandas/core/indexes/range.py:224: in memory_usage
return self.nbytes
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> val = self.func(obj)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/_libs/src/properties.pyx:34: TypeError
_____________________ TestMultiLevel.test_xs_level_series ______________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.test_multilevel.TestMultiLevel object at 0x000000000b9e9398>
def test_xs_level_series(self):
s = self.frame['A']
result = s[:, 'two']
expected = self.frame.xs('two', level=1)['A']
> tm.assert_series_equal(result, expected)
pandas/tests/test_multilevel.py:565:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/util/testing.py:1239: in assert_series_equal
obj='{0}.index'.format(obj))
pandas/util/testing.py:899: in assert_index_equal
_check_types(left, right, obj=obj)
pandas/util/testing.py:879: in _check_types
assert_class_equal(left, right, exact=exact, obj=obj)
pandas/util/testing.py:976: in assert_class_equal
repr_class(right))
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
obj = 'Series.index', message = 'Series.index classes are not equivalent'
left = MultiIndex(levels=[[u'foo', u'bar', u'baz', u'qux'], [u'one', u'two', u'three'... labels=[[0, 1, 2, 3], [1, 1, 1, 1]],
names=[u'first', u'second'])
right = Index([u'foo', u'bar', u'baz', u'qux'], dtype='object', name=u'first')
diff = None
def raise_assert_detail(obj, message, left, right, diff=None):
if isinstance(left, np.ndarray):
left = pprint_thing(left)
if isinstance(right, np.ndarray):
right = pprint_thing(right)
msg = """{0} are different
{1}
[left]: {2}
[right]: {3}""".format(obj, message, left, right)
if diff is not None:
msg = msg + "\n[diff]: {diff}".format(diff=diff)
> raise AssertionError(msg)
E AssertionError: Series.index are different
E
E Series.index classes are not equivalent
E [left]: MultiIndex(levels=[[u'foo', u'bar', u'baz', u'qux'], [u'one', u'two', u'three']],
E labels=[[0, 1, 2, 3], [1, 1, 1, 1]],
E names=[u'first', u'second'])
E [right]: Index([u'foo', u'bar', u'baz', u'qux'], dtype='object', name=u'first')
pandas/util/testing.py:1102: AssertionError
______________________ TestMultiIndex.test_get_loc_level _______________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_multi.TestMultiIndex object at 0x0000000008a3b018>
def test_get_loc_level(self):
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
loc, new_index = index.get_loc_level((0, 1))
expected = slice(1, 2)
exp_index = index[expected].droplevel(0).droplevel(0)
assert loc == expected
assert new_index.equals(exp_index)
loc, new_index = index.get_loc_level((0, 1, 0))
expected = 1
assert loc == expected
assert new_index is None
pytest.raises(KeyError, index.get_loc_level, (2, 2))
index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array(
[0, 0, 0, 0]), np.array([0, 1, 2, 3])])
result, new_index = index.get_loc_level((2000, slice(None, None)))
expected = slice(None, None)
assert result == expected
> assert new_index.equals(index.droplevel(0))
E AssertionError: assert False
E + where False = <bound method MultiIndex.equals of MultiIndex(levels=[[2000], [0, 1, 2, 3]],\n labels=[[0, 0, 0, 0], [0, 1, 2, 3]])>(Int64Index([0, 1, 2, 3], dtype='int64'))
E + where <bound method MultiIndex.equals of MultiIndex(levels=[[2000], [0, 1, 2, 3]],\n labels=[[0, 0, 0, 0], [0, 1, 2, 3]])> = MultiIndex(levels=[[2000], [0, 1, 2, 3]],\n labels=[[0, 0, 0, 0], [0, 1, 2, 3]]).equals
E + and Int64Index([0, 1, 2, 3], dtype='int64') = <bound method MultiIndex.droplevel of MultiIndex(levels=[[2000], [0, 1, 2, 3]],\n labels=[[0, 0, 0, 0], [0, 1, 2, 3]])>(0)
E + where <bound method MultiIndex.droplevel of MultiIndex(levels=[[2000], [0, 1, 2, 3]],\n labels=[[0, 0, 0, 0], [0, 1, 2, 3]])> = MultiIndex(levels=[[2000], [0, 1, 2, 3]],\n labels=[[0, 0, 0, 0], [0, 1, 2, 3]]).droplevel
pandas/tests/indexes/test_multi.py:1173: AssertionError
_____________________ TestGroupBy.test_groupby_return_type _____________________
[gw0] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.groupby.test_groupby.TestGroupBy object at 0x000000000812db78>
def test_groupby_return_type(self):
# GH2893, return a reduced type
df1 = DataFrame(
[{"val1": 1, "val2": 20},
{"val1": 1, "val2": 19},
{"val1": 2, "val2": 27},
{"val1": 2, "val2": 12}
])
def func(dataf):
return dataf["val2"] - dataf["val2"].mean()
result = df1.groupby("val1", squeeze=True).apply(func)
assert isinstance(result, Series)
df2 = DataFrame(
[{"val1": 1, "val2": 20},
{"val1": 1, "val2": 19},
{"val1": 1, "val2": 27},
{"val1": 1, "val2": 12}
])
def func(dataf):
return dataf["val2"] - dataf["val2"].mean()
result = df2.groupby("val1", squeeze=True).apply(func)
assert isinstance(result, Series)
# GH3596, return a consistent type (regression in 0.11 from 0.10.1)
df = DataFrame([[1, 1], [1, 1]], columns=['X', 'Y'])
result = df.groupby('X', squeeze=False).count()
assert isinstance(result, DataFrame)
# GH5592
# inconcistent return type
df = DataFrame(dict(A=['Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb',
'Pony', 'Pony'], B=Series(
np.arange(7), dtype='int64'), C=date_range(
'20130101', periods=7)))
def f(grp):
return grp.iloc[0]
expected = df.groupby('A').first()[['B']]
result = df.groupby('A').apply(f)[['B']]
assert_frame_equal(result, expected)
def f(grp):
if grp.name == 'Tiger':
return None
return grp.iloc[0]
result = df.groupby('A').apply(f)[['B']]
e = expected.copy()
e.loc['Tiger'] = np.nan
assert_frame_equal(result, e)
def f(grp):
if grp.name == 'Pony':
return None
return grp.iloc[0]
result = df.groupby('A').apply(f)[['B']]
e = expected.copy()
e.loc['Pony'] = np.nan
assert_frame_equal(result, e)
# 5592 revisited, with datetimes
def f(grp):
if grp.name == 'Pony':
return None
return grp.iloc[0]
result = df.groupby('A').apply(f)[['C']]
e = df.groupby('A').first()[['C']]
> e.loc['Pony'] = pd.NaT
pandas/tests/groupby/test_groupby.py:584:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexing.py:179: in __setitem__
self._setitem_with_indexer(indexer, value)
pandas/core/indexing.py:614: in _setitem_with_indexer
self.obj._check_is_chained_assignment_possible()
pandas/core/generic.py:1944: in _check_is_chained_assignment_possible
self._check_setitem_copy(stacklevel=4, t='referant')
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = C
A
Lamb 2013-01-04
Pony 2013-01-06
Tiger 2013-01-01
stacklevel = 4
t = '\nA value is trying to be set on a copy of a slice from a DataFrame\n\nSee the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy'
force = False
def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
"""
Parameters
----------
stacklevel : integer, default 4
the level to show of the stack when the error is output
t : string, the type of setting error
force : boolean, default False
if True, then force showing an error
validate if we are doing a settitem on a chained copy.
If you call this function, be sure to set the stacklevel such that the
user will see the error *at the level of setting*
It is technically possible to figure out that we are setting on
a copy even WITH a multi-dtyped pandas object. In other words, some
blocks may be views while other are not. Currently _is_view will ALWAYS
return False for multi-blocks to avoid having to handle this case.
df = DataFrame(np.arange(0,9), columns=['count'])
df['group'] = 'b'
# This technically need not raise SettingWithCopy if both are view
# (which is not # generally guaranteed but is usually True. However,
# this is in general not a good practice and we recommend using .loc.
df.iloc[0:5]['group'] = 'a'
"""
if force or self.is_copy:
value = config.get_option('mode.chained_assignment')
if value is None:
return
# see if the copy is not actually refererd; if so, then disolve
# the copy weakref
try:
gc.collect(2)
if not gc.get_referents(self.is_copy()):
self.is_copy = None
return
except:
pass
# we might be a false positive
try:
if self.is_copy().shape == self.shape:
self.is_copy = None
return
except:
pass
# a custom message
if isinstance(self.is_copy, string_types):
t = self.is_copy
elif t == 'referant':
t = ("\n"
"A value is trying to be set on a copy of a slice from a "
"DataFrame\n\n"
"See the caveats in the documentation: "
"http://pandas.pydata.org/pandas-docs/stable/"
"indexing.html#indexing-view-versus-copy"
)
else:
t = ("\n"
"A value is trying to be set on a copy of a slice from a "
"DataFrame.\n"
"Try using .loc[row_indexer,col_indexer] = value "
"instead\n\nSee the caveats in the documentation: "
"http://pandas.pydata.org/pandas-docs/stable/"
"indexing.html#indexing-view-versus-copy"
)
if value == 'raise':
> raise SettingWithCopyError(t)
E SettingWithCopyError:
E A value is trying to be set on a copy of a slice from a DataFrame
E
E See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
pandas/core/generic.py:2026: SettingWithCopyError
__________________________ TestIndex.test_duplicates ___________________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_base.TestIndex object at 0x0000000004649440>
def test_duplicates(self):
for ind in self.indices.values():
if not len(ind):
continue
if isinstance(ind, MultiIndex):
continue
> idx = self._holder([ind[0]] * 5)
pandas/tests/indexes/common.py:319:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/base.py:352: in __new__
return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
pandas/core/indexes/base.py:287: in __new__
subarr, copy, name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
cls = <class 'pandas.core.indexes.base.Index'>
data = array([9223372036854775808, 9223372036854775808, 9223372036854775808,
9223372036854775808, 9223372036854775808], dtype=object)
copy = False, name = None
@classmethod
def _try_convert_to_int_index(cls, data, copy, name):
"""
Attempt to convert an array of data into an integer index.
Parameters
----------
data : The data to convert.
copy : Whether to copy the data or not.
name : The name of the index returned.
Returns
-------
int_index : data converted to either an Int64Index or a
UInt64Index
Raises
------
ValueError if the conversion was not successful.
"""
from .numeric import Int64Index, UInt64Index
try:
> res = data.astype('i8', copy=False)
E SystemError: An exception was set, but function returned a value
pandas/core/indexes/base.py:672: SystemError
_________________________ TestIndex.test_memory_usage __________________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_base.TestIndex object at 0x0000000009f2c8e0>
def test_memory_usage(self):
for name, index in compat.iteritems(self.indices):
result = index.memory_usage()
if len(index):
index.get_loc(index[0])
result2 = index.memory_usage()
> result3 = index.memory_usage(deep=True)
pandas/tests/indexes/common.py:402:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/base.py:576: in memory_usage
result = super(Index, self).memory_usage(deep=deep)
pandas/core/base.py:1075: in memory_usage
v += lib.memory_usage_of_objects(self.values)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> s += arr[i].__sizeof__()
E AttributeError: 'unicode' object has no attribute '__sizeof__'
pandas/_libs/lib.pyx:128: AttributeError
_______________________ TestIndex.test_intersection_base _______________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_base.TestIndex object at 0x00000000086b62f8>
def test_intersection_base(self):
for name, idx in compat.iteritems(self.indices):
first = idx[:5]
second = idx[:3]
intersect = first.intersection(second)
if isinstance(idx, CategoricalIndex):
pass
else:
assert tm.equalContents(intersect, second)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
if isinstance(idx, PeriodIndex):
msg = "can only call with other PeriodIndex-ed objects"
with tm.assert_raises_regex(ValueError, msg):
result = first.intersection(case)
elif isinstance(idx, CategoricalIndex):
pass
else:
> result = first.intersection(case)
pandas/tests/indexes/common.py:576:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/base.py:2256: in intersection
other = _ensure_index(other)
pandas/core/indexes/base.py:4039: in _ensure_index
return Index(index_like)
pandas/core/indexes/base.py:287: in __new__
subarr, copy, name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
cls = <class 'pandas.core.indexes.base.Index'>
data = array([9223372036854775808, 9223372036854775809, 9223372036854775810], dtype=object)
copy = False, name = None
@classmethod
def _try_convert_to_int_index(cls, data, copy, name):
"""
Attempt to convert an array of data into an integer index.
Parameters
----------
data : The data to convert.
copy : Whether to copy the data or not.
name : The name of the index returned.
Returns
-------
int_index : data converted to either an Int64Index or a
UInt64Index
Raises
------
ValueError if the conversion was not successful.
"""
from .numeric import Int64Index, UInt64Index
try:
> res = data.astype('i8', copy=False)
E SystemError: An exception was set, but function returned a value
pandas/core/indexes/base.py:672: SystemError
__________________________ TestIndex.test_union_base ___________________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_base.TestIndex object at 0x000000000bc55088>
def test_union_base(self):
for name, idx in compat.iteritems(self.indices):
first = idx[3:]
second = idx[:5]
everything = idx
union = first.union(second)
assert tm.equalContents(union, everything)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
if isinstance(idx, PeriodIndex):
msg = "can only call with other PeriodIndex-ed objects"
with tm.assert_raises_regex(ValueError, msg):
result = first.union(case)
elif isinstance(idx, CategoricalIndex):
pass
else:
> result = first.union(case)
pandas/tests/indexes/common.py:603:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/base.py:2170: in union
other = _ensure_index(other)
pandas/core/indexes/base.py:4039: in _ensure_index
return Index(index_like)
pandas/core/indexes/base.py:287: in __new__
subarr, copy, name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
cls = <class 'pandas.core.indexes.base.Index'>
data = array([9223372036854775808, 9223372036854775809, 9223372036854775810,
9223372036854775811, 9223372036854775812], dtype=object)
copy = False, name = None
@classmethod
def _try_convert_to_int_index(cls, data, copy, name):
"""
Attempt to convert an array of data into an integer index.
Parameters
----------
data : The data to convert.
copy : Whether to copy the data or not.
name : The name of the index returned.
Returns
-------
int_index : data converted to either an Int64Index or a
UInt64Index
Raises
------
ValueError if the conversion was not successful.
"""
from .numeric import Int64Index, UInt64Index
try:
> res = data.astype('i8', copy=False)
E SystemError: An exception was set, but function returned a value
pandas/core/indexes/base.py:672: SystemError
________________________ TestIndex.test_difference_base ________________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_base.TestIndex object at 0x000000000b4e35c8>
def test_difference_base(self):
for name, idx in compat.iteritems(self.indices):
first = idx[2:]
second = idx[:4]
answer = idx[4:]
result = first.difference(second)
if isinstance(idx, CategoricalIndex):
pass
else:
assert tm.equalContents(result, answer)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
if isinstance(idx, PeriodIndex):
msg = "can only call with other PeriodIndex-ed objects"
with tm.assert_raises_regex(ValueError, msg):
result = first.difference(case)
elif isinstance(idx, CategoricalIndex):
pass
elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
assert result.__class__ == answer.__class__
tm.assert_numpy_array_equal(result.asi8, answer.asi8)
else:
> result = first.difference(case)
pandas/tests/indexes/common.py:637:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/base.py:2317: in difference
other, result_name = self._convert_can_do_setop(other)
pandas/core/indexes/base.py:1050: in _convert_can_do_setop
other = Index(other, name=self.name)
pandas/core/indexes/base.py:352: in __new__
return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
pandas/core/indexes/base.py:287: in __new__
subarr, copy, name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
cls = <class 'pandas.core.indexes.base.Index'>
data = array([9223372036854775808, 9223372036854775809, 9223372036854775810,
9223372036854775811], dtype=object)
copy = False, name = None
@classmethod
def _try_convert_to_int_index(cls, data, copy, name):
"""
Attempt to convert an array of data into an integer index.
Parameters
----------
data : The data to convert.
copy : Whether to copy the data or not.
name : The name of the index returned.
Returns
-------
int_index : data converted to either an Int64Index or a
UInt64Index
Raises
------
ValueError if the conversion was not successful.
"""
from .numeric import Int64Index, UInt64Index
try:
> res = data.astype('i8', copy=False)
E SystemError: An exception was set, but function returned a value
pandas/core/indexes/base.py:672: SystemError
_________________________ TestMultiIndex.test_isin_nan _________________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_multi.TestMultiIndex object at 0x0000000007ee0640>
def test_isin_nan(self):
idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]])
tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]),
> np.array([False, False]))
pandas/tests/indexes/test_multi.py:2580:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/util/testing.py:1169: in assert_numpy_array_equal
_raise(left, right, err_msg)
pandas/util/testing.py:1163: in _raise
raise_assert_detail(obj, msg, left, right)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
obj = 'numpy array', message = 'numpy array values are different (50.0 %)'
left = '[False, True]', right = '[False, False]', diff = None
def raise_assert_detail(obj, message, left, right, diff=None):
if isinstance(left, np.ndarray):
left = pprint_thing(left)
if isinstance(right, np.ndarray):
right = pprint_thing(right)
msg = """{0} are different
{1}
[left]: {2}
[right]: {3}""".format(obj, message, left, right)
if diff is not None:
msg = msg + "\n[diff]: {diff}".format(diff=diff)
> raise AssertionError(msg)
E AssertionError: numpy array are different
E
E numpy array values are different (50.0 %)
E [left]: [False, True]
E [right]: [False, False]
pandas/util/testing.py:1102: AssertionError
____________________ TestPeriodIndex.test_as_frame_columns _____________________
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.frame.test_period.TestPeriodIndex object at 0x000000000b12b8d8>
def test_as_frame_columns(self):
rng = period_range('1/1/2000', periods=5)
df = DataFrame(randn(10, 5), columns=rng)
ts = df[rng[0]]
tm.assert_series_equal(ts, df.iloc[:, 0])
# GH # 1211
> repr(df)
pandas/tests/frame/test_period.py:28:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/base.py:72: in __repr__
return str(self)
pandas/core/base.py:52: in __str__
return self.__bytes__()
pandas/core/base.py:64: in __bytes__
return self.__unicode__().encode(encoding, 'replace')
pandas/core/frame.py:626: in __unicode__
line_width=width, show_dimensions=show_dimensions)
pandas/core/frame.py:1648: in to_string
formatter.to_string()
pandas/io/formats/format.py:588: in to_string
strcols = self._to_str_columns()
pandas/io/formats/format.py:516: in _to_str_columns
str_columns = self._get_formatted_column_labels(frame)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pandas.io.formats.format.DataFrameFormatter object at 0x000000000b100a68>
frame = <[TypeError("ufunc 'add' did not contain a loop with signature matching types dtype('<U10') dtype('<U10') dtype('<U10')") raised in repr()] SafeRepr object at 0xdf4db60>
def _get_formatted_column_labels(self, frame):
from pandas.core.index import _sparsify
columns = frame.columns
if isinstance(columns, MultiIndex):
fmt_columns = columns.format(sparsify=False, adjoin=False)
fmt_columns = lzip(*fmt_columns)
dtypes = self.frame.dtypes._values
# if we have a Float level, they don't use leading space at all
restrict_formatting = any([l.is_floating for l in columns.levels])
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
def space_format(x, y):
if (y not in self.formatters and
need_leadsp[x] and not restrict_formatting):
return ' ' + y
return y
str_columns = list(zip(*[[space_format(x, y) for y in x]
for x in fmt_columns]))
if self.sparsify:
str_columns = _sparsify(str_columns)
str_columns = [list(x) for x in zip(*str_columns)]
else:
fmt_columns = columns.format()
dtypes = self.frame.dtypes
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
str_columns = [[' ' + x if not self._get_formatter(i) and
need_leadsp[x] else x]
for i, (col, x) in enumerate(zip(columns,
> fmt_columns))]
E TypeError: ufunc 'add' did not contain a loop with signature matching types dtype('<U10') dtype('<U10') dtype('<U10')
pandas/io/formats/format.py:767: TypeError
___________________________ TestIndex.test_isin_nan ____________________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_base.TestIndex object at 0x0000000006917328>
def test_isin_nan(self):
tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([np.nan]),
> np.array([False, True]))
pandas/tests/indexes/test_base.py:1365:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/util/testing.py:1169: in assert_numpy_array_equal
_raise(left, right, err_msg)
pandas/util/testing.py:1163: in _raise
raise_assert_detail(obj, msg, left, right)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
obj = 'numpy array', message = 'numpy array values are different (50.0 %)'
left = '[False, False]', right = '[False, True]', diff = None
def raise_assert_detail(obj, message, left, right, diff=None):
if isinstance(left, np.ndarray):
left = pprint_thing(left)
if isinstance(right, np.ndarray):
right = pprint_thing(right)
msg = """{0} are different
{1}
[left]: {2}
[right]: {3}""".format(obj, message, left, right)
if diff is not None:
msg = msg + "\n[diff]: {diff}".format(diff=diff)
> raise AssertionError(msg)
E AssertionError: numpy array are different
E
E numpy array values are different (50.0 %)
E [left]: [False, False]
E [right]: [False, True]
pandas/util/testing.py:1102: AssertionError
_____________________ TestMixedIntIndex.test_memory_usage ______________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_base.TestMixedIntIndex object at 0x0000000005b18598>
def test_memory_usage(self):
for name, index in compat.iteritems(self.indices):
result = index.memory_usage()
if len(index):
index.get_loc(index[0])
result2 = index.memory_usage()
> result3 = index.memory_usage(deep=True)
pandas/tests/indexes/common.py:402:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/base.py:576: in memory_usage
result = super(Index, self).memory_usage(deep=deep)
pandas/core/base.py:1075: in memory_usage
v += lib.memory_usage_of_objects(self.values)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> s += arr[i].__sizeof__()
E AttributeError: 'int' object has no attribute '__sizeof__'
pandas/_libs/lib.pyx:128: AttributeError
____________________ TestUInt64Index.test_intersection_base ____________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_numeric.TestUInt64Index object at 0x000000000805ca68>
def test_intersection_base(self):
for name, idx in compat.iteritems(self.indices):
first = idx[:5]
second = idx[:3]
intersect = first.intersection(second)
if isinstance(idx, CategoricalIndex):
pass
else:
assert tm.equalContents(intersect, second)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
if isinstance(idx, PeriodIndex):
msg = "can only call with other PeriodIndex-ed objects"
with tm.assert_raises_regex(ValueError, msg):
result = first.intersection(case)
elif isinstance(idx, CategoricalIndex):
pass
else:
> result = first.intersection(case)
pandas/tests/indexes/common.py:576:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/base.py:2256: in intersection
other = _ensure_index(other)
pandas/core/indexes/base.py:4039: in _ensure_index
return Index(index_like)
pandas/core/indexes/base.py:287: in __new__
subarr, copy, name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
cls = <class 'pandas.core.indexes.base.Index'>
data = array([9223372036854775808, 9223372036854775818, 9223372036854775823], dtype=object)
copy = False, name = None
@classmethod
def _try_convert_to_int_index(cls, data, copy, name):
"""
Attempt to convert an array of data into an integer index.
Parameters
----------
data : The data to convert.
copy : Whether to copy the data or not.
name : The name of the index returned.
Returns
-------
int_index : data converted to either an Int64Index or a
UInt64Index
Raises
------
ValueError if the conversion was not successful.
"""
from .numeric import Int64Index, UInt64Index
try:
> res = data.astype('i8', copy=False)
E SystemError: An exception was set, but function returned a value
pandas/core/indexes/base.py:672: SystemError
_______________________ TestUInt64Index.test_union_base ________________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_numeric.TestUInt64Index object at 0x000000000805c410>
def test_union_base(self):
for name, idx in compat.iteritems(self.indices):
first = idx[3:]
second = idx[:5]
everything = idx
union = first.union(second)
assert tm.equalContents(union, everything)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
if isinstance(idx, PeriodIndex):
msg = "can only call with other PeriodIndex-ed objects"
with tm.assert_raises_regex(ValueError, msg):
result = first.union(case)
elif isinstance(idx, CategoricalIndex):
pass
else:
> result = first.union(case)
pandas/tests/indexes/common.py:603:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/base.py:2170: in union
other = _ensure_index(other)
pandas/core/indexes/base.py:4039: in _ensure_index
return Index(index_like)
pandas/core/indexes/base.py:287: in __new__
subarr, copy, name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
cls = <class 'pandas.core.indexes.base.Index'>
data = array([9223372036854775808, 9223372036854775818, 9223372036854775823,
9223372036854775828, 9223372036854775833], dtype=object)
copy = False, name = None
@classmethod
def _try_convert_to_int_index(cls, data, copy, name):
"""
Attempt to convert an array of data into an integer index.
Parameters
----------
data : The data to convert.
copy : Whether to copy the data or not.
name : The name of the index returned.
Returns
-------
int_index : data converted to either an Int64Index or a
UInt64Index
Raises
------
ValueError if the conversion was not successful.
"""
from .numeric import Int64Index, UInt64Index
try:
> res = data.astype('i8', copy=False)
E SystemError: An exception was set, but function returned a value
pandas/core/indexes/base.py:672: SystemError
_____________________ TestUInt64Index.test_difference_base _____________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_numeric.TestUInt64Index object at 0x000000000803b8a0>
def test_difference_base(self):
for name, idx in compat.iteritems(self.indices):
first = idx[2:]
second = idx[:4]
answer = idx[4:]
result = first.difference(second)
if isinstance(idx, CategoricalIndex):
pass
else:
assert tm.equalContents(result, answer)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
if isinstance(idx, PeriodIndex):
msg = "can only call with other PeriodIndex-ed objects"
with tm.assert_raises_regex(ValueError, msg):
result = first.difference(case)
elif isinstance(idx, CategoricalIndex):
pass
elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
assert result.__class__ == answer.__class__
tm.assert_numpy_array_equal(result.asi8, answer.asi8)
else:
> result = first.difference(case)
pandas/tests/indexes/common.py:637:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/base.py:2317: in difference
other, result_name = self._convert_can_do_setop(other)
pandas/core/indexes/base.py:1050: in _convert_can_do_setop
other = Index(other, name=self.name)
pandas/core/indexes/base.py:352: in __new__
return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
pandas/core/indexes/base.py:287: in __new__
subarr, copy, name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
cls = <class 'pandas.core.indexes.base.Index'>
data = array([9223372036854775808, 9223372036854775818, 9223372036854775823,
9223372036854775828], dtype=object)
copy = False, name = None
@classmethod
def _try_convert_to_int_index(cls, data, copy, name):
"""
Attempt to convert an array of data into an integer index.
Parameters
----------
data : The data to convert.
copy : Whether to copy the data or not.
name : The name of the index returned.
Returns
-------
int_index : data converted to either an Int64Index or a
UInt64Index
Raises
------
ValueError if the conversion was not successful.
"""
from .numeric import Int64Index, UInt64Index
try:
> res = data.astype('i8', copy=False)
E SystemError: An exception was set, but function returned a value
pandas/core/indexes/base.py:672: SystemError
__________________ TestUInt64Index.test_symmetric_difference ___________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_numeric.TestUInt64Index object at 0x000000000803ad08>
def test_symmetric_difference(self):
for name, idx in compat.iteritems(self.indices):
first = idx[1:]
second = idx[:-1]
if isinstance(idx, CategoricalIndex):
pass
else:
answer = idx[[0, -1]]
result = first.symmetric_difference(second)
assert tm.equalContents(result, answer)
# GH 10149
cases = [klass(second.values)
for klass in [np.array, Series, list]]
for case in cases:
if isinstance(idx, PeriodIndex):
msg = "can only call with other PeriodIndex-ed objects"
with tm.assert_raises_regex(ValueError, msg):
result = first.symmetric_difference(case)
elif isinstance(idx, CategoricalIndex):
pass
else:
> result = first.symmetric_difference(case)
pandas/tests/indexes/common.py:667:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/base.py:2368: in symmetric_difference
other, result_name_update = self._convert_can_do_setop(other)
pandas/core/indexes/base.py:1050: in _convert_can_do_setop
other = Index(other, name=self.name)
pandas/core/indexes/base.py:352: in __new__
return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
pandas/core/indexes/base.py:287: in __new__
subarr, copy, name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
cls = <class 'pandas.core.indexes.base.Index'>
data = array([9223372036854775808, 9223372036854775818, 9223372036854775823,
9223372036854775828], dtype=object)
copy = False, name = None
@classmethod
def _try_convert_to_int_index(cls, data, copy, name):
"""
Attempt to convert an array of data into an integer index.
Parameters
----------
data : The data to convert.
copy : Whether to copy the data or not.
name : The name of the index returned.
Returns
-------
int_index : data converted to either an Int64Index or a
UInt64Index
Raises
------
ValueError if the conversion was not successful.
"""
from .numeric import Int64Index, UInt64Index
try:
> res = data.astype('i8', copy=False)
E SystemError: An exception was set, but function returned a value
pandas/core/indexes/base.py:672: SystemError
___________________ TestDataFrameReprInfoEtc.test_info_wide ____________________
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.frame.test_repr_info.TestDataFrameReprInfoEtc object at 0x000000000a8f2f38>
def test_info_wide(self):
from pandas import set_option, reset_option
io = StringIO()
df = DataFrame(np.random.randn(5, 101))
> df.info(buf=io)
pandas/tests/frame/test_repr_info.py:210:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/frame.py:1835: in info
mem_usage = self.memory_usage(index=True, deep=deep).sum()
pandas/core/frame.py:1871: in memory_usage
result = Series(self.index.memory_usage(deep=deep),
pandas/core/indexes/range.py:224: in memory_usage
return self.nbytes
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> val = self.func(obj)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/_libs/src/properties.pyx:34: TypeError
_____________ TestDataFrameReprInfoEtc.test_info_duplicate_columns _____________
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.frame.test_repr_info.TestDataFrameReprInfoEtc object at 0x000000000a8f3050>
def test_info_duplicate_columns(self):
io = StringIO()
# it works!
frame = DataFrame(np.random.randn(1500, 4),
columns=['a', 'a', 'b', 'b'])
> frame.info(buf=io)
pandas/tests/frame/test_repr_info.py:230:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/frame.py:1835: in info
mem_usage = self.memory_usage(index=True, deep=deep).sum()
pandas/core/frame.py:1871: in memory_usage
result = Series(self.index.memory_usage(deep=deep),
pandas/core/indexes/range.py:224: in memory_usage
return self.nbytes
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> val = self.func(obj)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/_libs/src/properties.pyx:34: TypeError
____________________ TestCategoricalIndex.test_memory_usage ____________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_category.TestCategoricalIndex object at 0x0000000002ff26e8>
def test_memory_usage(self):
for name, index in compat.iteritems(self.indices):
result = index.memory_usage()
if len(index):
index.get_loc(index[0])
result2 = index.memory_usage()
> result3 = index.memory_usage(deep=True)
pandas/tests/indexes/common.py:402:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/base.py:576: in memory_usage
result = super(Index, self).memory_usage(deep=deep)
pandas/core/base.py:1071: in memory_usage
return self.values.memory_usage(deep=deep)
pandas/core/categorical.py:1134: in memory_usage
return self._codes.nbytes + self._categories.memory_usage(deep=deep)
pandas/core/indexes/base.py:576: in memory_usage
result = super(Index, self).memory_usage(deep=deep)
pandas/core/base.py:1075: in memory_usage
v += lib.memory_usage_of_objects(self.values)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> s += arr[i].__sizeof__()
E AttributeError: 'str' object has no attribute '__sizeof__'
pandas/_libs/lib.pyx:128: AttributeError
__ TestDataFrameReprInfoEtc.test_info_duplicate_columns_shows_correct_dtypes ___
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.frame.test_repr_info.TestDataFrameReprInfoEtc object at 0x0000000009ed9830>
def test_info_duplicate_columns_shows_correct_dtypes(self):
# GH11761
io = StringIO()
frame = DataFrame([[1, 2.0]],
columns=['a', 'a'])
> frame.info(buf=io)
pandas/tests/frame/test_repr_info.py:238:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/frame.py:1835: in info
mem_usage = self.memory_usage(index=True, deep=deep).sum()
pandas/core/frame.py:1871: in memory_usage
result = Series(self.index.memory_usage(deep=deep),
pandas/core/indexes/range.py:224: in memory_usage
return self.nbytes
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> val = self.func(obj)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/_libs/src/properties.pyx:34: TypeError
____________ TestDataFrameReprInfoEtc.test_info_shows_column_dtypes ____________
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.frame.test_repr_info.TestDataFrameReprInfoEtc object at 0x0000000009ed8170>
def test_info_shows_column_dtypes(self):
dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]',
'complex128', 'object', 'bool']
data = {}
n = 10
for i, dtype in enumerate(dtypes):
data[i] = np.random.randint(2, size=n).astype(dtype)
df = DataFrame(data)
buf = StringIO()
> df.info(buf=buf)
pandas/tests/frame/test_repr_info.py:253:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/frame.py:1835: in info
mem_usage = self.memory_usage(index=True, deep=deep).sum()
pandas/core/frame.py:1871: in memory_usage
result = Series(self.index.memory_usage(deep=deep),
pandas/core/indexes/range.py:224: in memory_usage
return self.nbytes
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> val = self.func(obj)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/_libs/src/properties.pyx:34: TypeError
________________ TestRangeIndex.test_ndarray_compat_properties _________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_range.TestRangeIndex object at 0x00000000088063a0>
def test_ndarray_compat_properties(self):
idx = self.create_index()
assert idx.T.equals(idx)
assert idx.transpose().equals(idx)
values = idx.values
for prop in self._compat_props:
assert getattr(idx, prop) == getattr(values, prop)
# test for validity
> idx.nbytes
pandas/tests/indexes/common.py:159:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> val = self.func(obj)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/_libs/src/properties.pyx:34: TypeError
_________________ TestDataFrameReprInfoEtc.test_info_max_cols __________________
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.frame.test_repr_info.TestDataFrameReprInfoEtc object at 0x0000000009ebd980>
def test_info_max_cols(self):
df = DataFrame(np.random.randn(10, 5))
for len_, verbose in [(5, None), (5, False), (10, True)]:
# For verbose always ^ setting ^ summarize ^ full output
with option_context('max_info_columns', 4):
buf = StringIO()
> df.info(buf=buf, verbose=verbose)
pandas/tests/frame/test_repr_info.py:265:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/frame.py:1835: in info
mem_usage = self.memory_usage(index=True, deep=deep).sum()
pandas/core/frame.py:1871: in memory_usage
result = Series(self.index.memory_usage(deep=deep),
pandas/core/indexes/range.py:224: in memory_usage
return self.nbytes
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> val = self.func(obj)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/_libs/src/properties.pyx:34: TypeError
_______________________ TestRangeIndex.test_memory_usage _______________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_range.TestRangeIndex object at 0x000000000710f168>
def test_memory_usage(self):
for name, index in compat.iteritems(self.indices):
> result = index.memory_usage()
pandas/tests/indexes/common.py:398:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/range.py:224: in memory_usage
return self.nbytes
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> val = self.func(obj)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/_libs/src/properties.pyx:34: TypeError
_______________ TestDataFrameReprInfoEtc.test_info_memory_usage ________________
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.frame.test_repr_info.TestDataFrameReprInfoEtc object at 0x000000000a11ef70>
def test_info_memory_usage(self):
# Ensure memory usage is displayed, when asserted, on the last line
dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]',
'complex128', 'object', 'bool']
data = {}
n = 10
for i, dtype in enumerate(dtypes):
data[i] = np.random.randint(2, size=n).astype(dtype)
df = DataFrame(data)
buf = StringIO()
# display memory usage case
> df.info(buf=buf, memory_usage=True)
pandas/tests/frame/test_repr_info.py:305:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/frame.py:1835: in info
mem_usage = self.memory_usage(index=True, deep=deep).sum()
pandas/core/frame.py:1871: in memory_usage
result = Series(self.index.memory_usage(deep=deep),
pandas/core/indexes/range.py:224: in memory_usage
return self.nbytes
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> val = self.func(obj)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/_libs/src/properties.pyx:34: TypeError
__________ TestDataFrameReprInfoEtc.test_info_memory_usage_qualified ___________
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.frame.test_repr_info.TestDataFrameReprInfoEtc object at 0x0000000008e10560>
def test_info_memory_usage_qualified(self):
buf = StringIO()
df = DataFrame(1, columns=list('ab'),
index=[1, 2, 3])
df.info(buf=buf)
assert '+' not in buf.getvalue()
buf = StringIO()
df = DataFrame(1, columns=list('ab'),
index=list('ABC'))
df.info(buf=buf)
assert '+' in buf.getvalue()
buf = StringIO()
df = DataFrame(1, columns=list('ab'),
index=pd.MultiIndex.from_product(
[range(3), range(3)]))
> df.info(buf=buf)
pandas/tests/frame/test_repr_info.py:403:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/frame.py:1835: in info
mem_usage = self.memory_usage(index=True, deep=deep).sum()
pandas/core/frame.py:1871: in memory_usage
result = Series(self.index.memory_usage(deep=deep),
pandas/core/indexes/multi.py:451: in memory_usage
return self._nbytes(deep)
pandas/core/indexes/multi.py:470: in _nbytes
names_nbytes = sum((getsizeof(i) for i in self.names))
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
.0 = <sequenceiterator object at 0x000000000adce668>
> names_nbytes = sum((getsizeof(i) for i in self.names))
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/core/indexes/multi.py:470: TypeError
________________________ TestCategoricalIndex.test_isin ________________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_category.TestCategoricalIndex object at 0x0000000005830b80>
def test_isin(self):
ci = CategoricalIndex(
list('aabca') + [np.nan], categories=['c', 'a', 'b'])
tm.assert_numpy_array_equal(
ci.isin(['c']),
np.array([False, False, False, True, False, False]))
tm.assert_numpy_array_equal(
ci.isin(['c', 'a', 'b']), np.array([True] * 5 + [False]))
tm.assert_numpy_array_equal(
ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6))
# mismatched categorical -> coerced to ndarray so doesn't matter
tm.assert_numpy_array_equal(
ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] *
6))
tm.assert_numpy_array_equal(
ci.isin(ci.set_categories(list('defghi'))),
> np.array([False] * 5 + [True]))
pandas/tests/indexes/test_category.py:572:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/util/testing.py:1169: in assert_numpy_array_equal
_raise(left, right, err_msg)
pandas/util/testing.py:1163: in _raise
raise_assert_detail(obj, msg, left, right)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
obj = 'numpy array', message = 'numpy array values are different (16.66667 %)'
left = '[False, False, False, False, False, False]'
right = '[False, False, False, False, False, True]', diff = None
def raise_assert_detail(obj, message, left, right, diff=None):
if isinstance(left, np.ndarray):
left = pprint_thing(left)
if isinstance(right, np.ndarray):
right = pprint_thing(right)
msg = """{0} are different
{1}
[left]: {2}
[right]: {3}""".format(obj, message, left, right)
if diff is not None:
msg = msg + "\n[diff]: {diff}".format(diff=diff)
> raise AssertionError(msg)
E AssertionError: numpy array are different
E
E numpy array values are different (16.66667 %)
E [left]: [False, False, False, False, False, False]
E [right]: [False, False, False, False, False, True]
pandas/util/testing.py:1102: AssertionError
______ TestDataFrameReprInfoEtc.test_info_memory_usage_bug_on_multiindex _______
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.frame.test_repr_info.TestDataFrameReprInfoEtc object at 0x0000000008e11a28>
def test_info_memory_usage_bug_on_multiindex(self):
# GH 14308
# memory usage introspection should not materialize .values
from string import ascii_uppercase as uppercase
def memory_usage(f):
return f.memory_usage(deep=True).sum()
N = 100
M = len(uppercase)
index = pd.MultiIndex.from_product([list(uppercase),
pd.date_range('20160101',
periods=N)],
names=['id', 'date'])
df = DataFrame({'value': np.random.randn(N * M)}, index=index)
unstacked = df.unstack('id')
assert df.values.nbytes == unstacked.values.nbytes
> assert memory_usage(df) > memory_usage(unstacked)
pandas/tests/frame/test_repr_info.py:432:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/tests/frame/test_repr_info.py:420: in memory_usage
return f.memory_usage(deep=True).sum()
pandas/core/frame.py:1871: in memory_usage
result = Series(self.index.memory_usage(deep=deep),
pandas/core/indexes/multi.py:451: in memory_usage
return self._nbytes(deep)
pandas/core/indexes/multi.py:468: in _nbytes
level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels))
pandas/core/indexes/multi.py:468: in <genexpr>
level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels))
pandas/core/indexes/base.py:576: in memory_usage
result = super(Index, self).memory_usage(deep=deep)
pandas/core/base.py:1075: in memory_usage
v += lib.memory_usage_of_objects(self.values)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> s += arr[i].__sizeof__()
E AttributeError: 'str' object has no attribute '__sizeof__'
pandas/_libs/lib.pyx:128: AttributeError
______________________ TestDataFrameReprInfoEtc.test_info ______________________
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
args = (<pandas.tests.frame.test_repr_info.TestDataFrameReprInfoEtc object at 0x0000000005fbf590>,)
kwargs = {}
@wraps(f)
def wrapper(*args, **kwargs):
try:
sys.stdout = StringIO()
> f(*args, **kwargs)
pandas/util/testing.py:695:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/tests/frame/test_repr_info.py:203: in test_info
frame.info()
pandas/core/frame.py:1835: in info
mem_usage = self.memory_usage(index=True, deep=deep).sum()
pandas/core/frame.py:1871: in memory_usage
result = Series(self.index.memory_usage(deep=deep),
pandas/core/indexes/range.py:224: in memory_usage
return self.nbytes
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> val = self.func(obj)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/_libs/src/properties.pyx:34: TypeError
__________________________ TestRangeIndex.test_nbytes __________________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_range.TestRangeIndex object at 0x0000000007778790>
def test_nbytes(self):
# memory savings vs int index
i = RangeIndex(0, 1000)
> assert i.nbytes < i.astype(int).nbytes / 10
pandas/tests/indexes/test_range.py:701:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> val = self.func(obj)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/_libs/src/properties.pyx:34: TypeError
________________ TestMultiIndex.test_ndarray_compat_properties _________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_multi.TestMultiIndex object at 0x00000000030ae330>
def test_ndarray_compat_properties(self):
idx = self.create_index()
assert idx.T.equals(idx)
assert idx.transpose().equals(idx)
values = idx.values
for prop in self._compat_props:
assert getattr(idx, prop) == getattr(values, prop)
# test for validity
> idx.nbytes
pandas/tests/indexes/common.py:159:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> val = self.func(obj)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/_libs/src/properties.pyx:34: TypeError
_______________________ TestMultiIndex.test_memory_usage _______________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.test_multi.TestMultiIndex object at 0x0000000004411a60>
def test_memory_usage(self):
for name, index in compat.iteritems(self.indices):
> result = index.memory_usage()
pandas/tests/indexes/common.py:398:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/indexes/multi.py:451: in memory_usage
return self._nbytes(deep)
pandas/core/indexes/multi.py:470: in _nbytes
names_nbytes = sum((getsizeof(i) for i in self.names))
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
.0 = <sequenceiterator object at 0x0000000009702188>
> names_nbytes = sum((getsizeof(i) for i in self.names))
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/core/indexes/multi.py:470: TypeError
____________ TestTimedeltaIndex.test_does_not_convert_mixed_integer ____________
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexes.timedeltas.test_timedelta.TestTimedeltaIndex object at 0x0000000006ac3280>
def test_does_not_convert_mixed_integer(self):
df = tm.makeCustomDataframe(10, 10,
data_gen_f=lambda *args, **kwargs: randn(),
r_idx_type='i', c_idx_type='td')
> str(df)
pandas/tests/indexes/timedeltas/test_timedelta.py:305:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/base.py:52: in __str__
return self.__bytes__()
pandas/core/base.py:64: in __bytes__
return self.__unicode__().encode(encoding, 'replace')
pandas/core/frame.py:626: in __unicode__
line_width=width, show_dimensions=show_dimensions)
pandas/core/frame.py:1648: in to_string
formatter.to_string()
pandas/io/formats/format.py:588: in to_string
strcols = self._to_str_columns()
pandas/io/formats/format.py:516: in _to_str_columns
str_columns = self._get_formatted_column_labels(frame)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pandas.io.formats.format.DataFrameFormatter object at 0x0000000006b2c170>
frame = <[TypeError("ufunc 'add' did not contain a loop with signature matching types dtype('S6') dtype('S6') dtype('S6')") raised in repr()] SafeRepr object at 0xea6e5a0>
def _get_formatted_column_labels(self, frame):
from pandas.core.index import _sparsify
columns = frame.columns
if isinstance(columns, MultiIndex):
fmt_columns = columns.format(sparsify=False, adjoin=False)
fmt_columns = lzip(*fmt_columns)
dtypes = self.frame.dtypes._values
# if we have a Float level, they don't use leading space at all
restrict_formatting = any([l.is_floating for l in columns.levels])
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
def space_format(x, y):
if (y not in self.formatters and
need_leadsp[x] and not restrict_formatting):
return ' ' + y
return y
str_columns = list(zip(*[[space_format(x, y) for y in x]
for x in fmt_columns]))
if self.sparsify:
str_columns = _sparsify(str_columns)
str_columns = [list(x) for x in zip(*str_columns)]
else:
fmt_columns = columns.format()
dtypes = self.frame.dtypes
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
str_columns = [[' ' + x if not self._get_formatter(i) and
need_leadsp[x] else x]
for i, (col, x) in enumerate(zip(columns,
> fmt_columns))]
E TypeError: ufunc 'add' did not contain a loop with signature matching types dtype('S6') dtype('S6') dtype('S6')
pandas/io/formats/format.py:767: TypeError
___________________________ test_round_trip_current ____________________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
current_pickle_data = {'cat': {'int16': [0, 1, 2, 3, 4, ..., 995, 996, 997, 998, 999]
Length: 1000
Categories (1000, int64): [0, 1, 2, 3, .....x'], [u'one', u'two']],
...2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
names=[u'first', u'second'])}, ...}
def test_round_trip_current(current_pickle_data):
try:
import cPickle as c_pickle
def c_pickler(obj, path):
with open(path, 'wb') as fh:
c_pickle.dump(obj, fh, protocol=-1)
def c_unpickler(path):
with open(path, 'rb') as fh:
fh.seek(0)
return c_pickle.load(fh)
except:
c_pickler = None
c_unpickler = None
import pickle as python_pickle
def python_pickler(obj, path):
with open(path, 'wb') as fh:
python_pickle.dump(obj, fh, protocol=-1)
def python_unpickler(path):
with open(path, 'rb') as fh:
fh.seek(0)
return python_pickle.load(fh)
data = current_pickle_data
for typ, dv in data.items():
for dt, expected in dv.items():
for writer in [pd.to_pickle, c_pickler, python_pickler]:
if writer is None:
continue
with tm.ensure_clean() as path:
# test writing with each pickler
> writer(expected, path)
pandas/tests/io/test_pickle.py:257:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/io/pickle.py:45: in to_pickle
pkl.dump(obj, f, protocol=protocol)
../pypy/lib_pypy/cPickle.py:119: in dump
Pickler(file, protocol).dump(obj)
../pypy/lib-python/2.7/pickle.py:224: in dump
self.save(obj)
../pypy/lib-python/2.7/pickle.py:331: in save
self.save_reduce(obj=obj, *rv)
../pypy/lib-python/2.7/pickle.py:427: in save_reduce
save(state)
../pypy/lib-python/2.7/pickle.py:286: in save
f(self, obj) # Call unbound method with explicit self
../pypy/lib-python/2.7/pickle.py:661: in save_dict
self._batch_setitems(obj.iteritems())
../pypy/lib-python/2.7/pickle.py:693: in _batch_setitems
save(v)
../pypy/lib-python/2.7/pickle.py:331: in save
self.save_reduce(obj=obj, *rv)
../pypy/lib-python/2.7/pickle.py:427: in save_reduce
save(state)
../pypy/lib-python/2.7/pickle.py:286: in save
f(self, obj) # Call unbound method with explicit self
../pypy/lib-python/2.7/pickle.py:570: in save_tuple
save(element)
../pypy/lib-python/2.7/pickle.py:286: in save
f(self, obj) # Call unbound method with explicit self
../pypy/lib-python/2.7/pickle.py:608: in save_list
self._batch_appends(iter(obj))
../pypy/lib-python/2.7/pickle.py:644: in _batch_appends
save(tmp[0])
../pypy/lib-python/2.7/pickle.py:331: in save
self.save_reduce(obj=obj, *rv)
../pypy/lib-python/2.7/pickle.py:427: in save_reduce
save(state)
../pypy/lib-python/2.7/pickle.py:286: in save
f(self, obj) # Call unbound method with explicit self
../pypy/lib-python/2.7/pickle.py:556: in save_tuple
save(element)
../pypy/lib-python/2.7/pickle.py:286: in save
f(self, obj) # Call unbound method with explicit self
../pypy/lib-python/2.7/pickle.py:556: in save_tuple
save(element)
../pypy/lib-python/2.7/pickle.py:331: in save
self.save_reduce(obj=obj, *rv)
../pypy/lib-python/2.7/pickle.py:402: in save_reduce
save(func)
../pypy/lib-python/2.7/pickle.py:286: in save
f(self, obj) # Call unbound method with explicit self
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <cPickle.Pickler object at 0x0000000006619b08>
obj = <type 'pandas._libs.sparse.BlockIndex'>, name = '_libs.sparse.BlockIndex'
pack = <built-in function pack>
def save_global(self, obj, name=None, pack=struct.pack):
write = self.write
memo = self.memo
if name is None:
name = obj.__name__
module = getattr(obj, "__module__", None)
if module is None:
module = whichmodule(obj, name)
try:
__import__(module)
mod = sys.modules[module]
klass = getattr(mod, name)
except (ImportError, KeyError, AttributeError):
raise PicklingError(
"Can't pickle %r: it's not found as %s.%s" %
> (obj, module, name))
E PicklingError: Can't pickle <type 'pandas._libs.sparse.BlockIndex'>: it's not found as pandas._libs.sparse.BlockIndex
../pypy/lib-python/2.7/pickle.py:800: PicklingError
_________________ TestChaining.test_detect_chained_assignment __________________
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexing.test_chaining_and_caching.TestChaining object at 0x0000000006aa9d70>
def test_detect_chained_assignment(self):
pd.set_option('chained_assignment', 'raise')
# work with the chain
expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB'))
df = DataFrame(np.arange(4).reshape(2, 2),
columns=list('AB'), dtype='int64')
assert df.is_copy is None
df['A'][0] = -5
df['A'][1] = -6
tm.assert_frame_equal(df, expected)
# test with the chaining
df = DataFrame({'A': Series(range(2), dtype='int64'),
'B': np.array(np.arange(2, 4), dtype=np.float64)})
assert df.is_copy is None
with pytest.raises(com.SettingWithCopyError):
df['A'][0] = -5
with pytest.raises(com.SettingWithCopyError):
df['A'][1] = np.nan
assert df['A'].is_copy is None
# Using a copy (the chain), fails
df = DataFrame({'A': Series(range(2), dtype='int64'),
'B': np.array(np.arange(2, 4), dtype=np.float64)})
with pytest.raises(com.SettingWithCopyError):
df.loc[0]['A'] = -5
# Doc example
df = DataFrame({'a': ['one', 'one', 'two', 'three',
'two', 'one', 'six'],
'c': Series(range(7), dtype='int64')})
assert df.is_copy is None
with pytest.raises(com.SettingWithCopyError):
indexer = df.a.str.startswith('o')
df[indexer]['c'] = 42
expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]})
df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]})
with pytest.raises(com.SettingWithCopyError):
df['A'][0] = 111
with pytest.raises(com.SettingWithCopyError):
df.loc[0]['A'] = 111
df.loc[0, 'A'] = 111
tm.assert_frame_equal(df, expected)
# gh-5475: Make sure that is_copy is picked up reconstruction
df = DataFrame({"A": [1, 2]})
assert df.is_copy is None
with tm.ensure_clean('__tmp__pickle') as path:
df.to_pickle(path)
df2 = pd.read_pickle(path)
df2["B"] = df2["A"]
df2["B"] = df2["A"]
# gh-5597: a spurious raise as we are setting the entire column here
from string import ascii_letters as letters
def random_text(nobs=100):
df = []
for i in range(nobs):
idx = np.random.randint(len(letters), size=2)
idx.sort()
df.append([letters[idx[0]:idx[1]]])
return DataFrame(df, columns=['letters'])
df = random_text(100000)
# Always a copy
x = df.iloc[[0, 1, 2]]
assert x.is_copy is not None
x = df.iloc[[0, 1, 2, 4]]
assert x.is_copy is not None
# Explicitly copy
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer].copy()
assert df.is_copy is None
df['letters'] = df['letters'].apply(str.lower)
# Implicitly take
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer]
assert df.is_copy is not None
df['letters'] = df['letters'].apply(str.lower)
# Implicitly take 2
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df = df.loc[indexer]
assert df.is_copy is not None
df.loc[:, 'letters'] = df['letters'].apply(str.lower)
# Should be ok even though it's a copy!
assert df.is_copy is None
df['letters'] = df['letters'].apply(str.lower)
assert df.is_copy is None
df = random_text(100000)
indexer = df.letters.apply(lambda x: len(x) > 10)
df.loc[indexer, 'letters'] = (
df.loc[indexer, 'letters'].apply(str.lower))
# an identical take, so no copy
df = DataFrame({'a': [1]}).dropna()
assert df.is_copy is None
df['a'] += 1
# Inplace ops, originally from:
# http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
a = [12, 23]
b = [123, None]
c = [1234, 2345]
d = [12345, 23456]
tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'),
('ears', 'right')]
events = {('eyes', 'left'): a,
('eyes', 'right'): b,
('ears', 'left'): c,
('ears', 'right'): d}
multiind = MultiIndex.from_tuples(tuples, names=['part', 'side'])
zed = DataFrame(events, index=['a', 'b'], columns=multiind)
with pytest.raises(com.SettingWithCopyError):
zed['eyes']['right'].fillna(value=555, inplace=True)
df = DataFrame(np.random.randn(10, 4))
s = df.iloc[:, 0].sort_values()
tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
tm.assert_series_equal(s, df[0].sort_values())
# see gh-6025: false positives
df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]})
str(df)
df['column1'] = df['column1'] + 'b'
str(df)
df = df[df['column2'] != 8]
str(df)
> df['column1'] = df['column1'] + 'c'
pandas/tests/indexing/test_chaining_and_caching.py:292:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/frame.py:2455: in __setitem__
self._set_item(key, value)
pandas/core/frame.py:2528: in _set_item
self._check_setitem_copy()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = column1 column2
0 abc 4
2 abc 9, stacklevel = 4
t = '\nA value is trying to be set on a copy of a slice from a DataFrame.\nTry using .loc[row_indexer,col_indexer] = value... the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy'
force = False
def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
"""
Parameters
----------
stacklevel : integer, default 4
the level to show of the stack when the error is output
t : string, the type of setting error
force : boolean, default False
if True, then force showing an error
validate if we are doing a settitem on a chained copy.
If you call this function, be sure to set the stacklevel such that the
user will see the error *at the level of setting*
It is technically possible to figure out that we are setting on
a copy even WITH a multi-dtyped pandas object. In other words, some
blocks may be views while other are not. Currently _is_view will ALWAYS
return False for multi-blocks to avoid having to handle this case.
df = DataFrame(np.arange(0,9), columns=['count'])
df['group'] = 'b'
# This technically need not raise SettingWithCopy if both are view
# (which is not # generally guaranteed but is usually True. However,
# this is in general not a good practice and we recommend using .loc.
df.iloc[0:5]['group'] = 'a'
"""
if force or self.is_copy:
value = config.get_option('mode.chained_assignment')
if value is None:
return
# see if the copy is not actually refererd; if so, then disolve
# the copy weakref
try:
gc.collect(2)
if not gc.get_referents(self.is_copy()):
self.is_copy = None
return
except:
pass
# we might be a false positive
try:
if self.is_copy().shape == self.shape:
self.is_copy = None
return
except:
pass
# a custom message
if isinstance(self.is_copy, string_types):
t = self.is_copy
elif t == 'referant':
t = ("\n"
"A value is trying to be set on a copy of a slice from a "
"DataFrame\n\n"
"See the caveats in the documentation: "
"http://pandas.pydata.org/pandas-docs/stable/"
"indexing.html#indexing-view-versus-copy"
)
else:
t = ("\n"
"A value is trying to be set on a copy of a slice from a "
"DataFrame.\n"
"Try using .loc[row_indexer,col_indexer] = value "
"instead\n\nSee the caveats in the documentation: "
"http://pandas.pydata.org/pandas-docs/stable/"
"indexing.html#indexing-view-versus-copy"
)
if value == 'raise':
> raise SettingWithCopyError(t)
E SettingWithCopyError:
E A value is trying to be set on a copy of a slice from a DataFrame.
E Try using .loc[row_indexer,col_indexer] = value instead
E
E See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
pandas/core/generic.py:2026: SettingWithCopyError
___________________ TestCompression.test_read_explicit[zip] ____________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.test_pickle.TestCompression object at 0x000000000958fda8>
compression = 'zip', get_random_path = '__FJp5sso12b__.pickle'
@pytest.mark.parametrize('compression', [None, 'gzip', 'bz2', 'xz', "zip"])
def test_read_explicit(self, compression, get_random_path):
# issue 11666
if compression == 'xz':
tm._skip_if_no_lzma()
base = get_random_path
path1 = base + ".raw"
path2 = base + ".compressed"
with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
df = tm.makeDataFrame()
# write to uncompressed file
df.to_pickle(p1, compression=None)
# compress
self.compress_file(p1, p2, compression=compression)
# read compressed file
> df2 = pd.read_pickle(p2, compression=compression)
pandas/tests/io/test_pickle.py:473:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/io/pickle.py:110: in read_pickle
return try_read(path)
pandas/io/pickle.py:108: in try_read
lambda f: pc.load(f, encoding=encoding, compat=True))
pandas/io/pickle.py:82: in read_wrapper
is_text=False)
pandas/io/common.py:361: in _get_handle
zip_file = zipfile.ZipFile(path_or_buf)
../pypy/lib-python/2.7/zipfile.py:774: in __init__
self._RealGetContents()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <zipfile.ZipFile object at 0x00000000095ad4e8>
def _RealGetContents(self):
"""Read in the table of contents for the ZIP file."""
fp = self.fp
try:
endrec = _EndRecData(fp)
except IOError:
raise BadZipfile("File is not a zip file")
if not endrec:
> raise BadZipfile, "File is not a zip file"
E BadZipfile: File is not a zip file
../pypy/lib-python/2.7/zipfile.py:817: BadZipfile
____________________ TestCompression.test_read_infer[.zip] _____________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.test_pickle.TestCompression object at 0x00000000069954b0>
ext = '.zip', get_random_path = '__FJp5sso12b__.pickle'
@pytest.mark.parametrize('ext', ['', '.gz', '.bz2', '.xz', '.zip',
'.no_compress'])
def test_read_infer(self, ext, get_random_path):
if ext == '.xz':
tm._skip_if_no_lzma()
base = get_random_path
path1 = base + ".raw"
path2 = base + ext
compression = None
for c in self._compression_to_extension:
if self._compression_to_extension[c] == ext:
compression = c
break
with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
df = tm.makeDataFrame()
# write to uncompressed file
df.to_pickle(p1, compression=None)
# compress
self.compress_file(p1, p2, compression=compression)
# read compressed file by inferred compression method
> df2 = pd.read_pickle(p2)
pandas/tests/io/test_pickle.py:502:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/io/pickle.py:110: in read_pickle
return try_read(path)
pandas/io/pickle.py:108: in try_read
lambda f: pc.load(f, encoding=encoding, compat=True))
pandas/io/pickle.py:82: in read_wrapper
is_text=False)
pandas/io/common.py:361: in _get_handle
zip_file = zipfile.ZipFile(path_or_buf)
../pypy/lib-python/2.7/zipfile.py:774: in __init__
self._RealGetContents()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <zipfile.ZipFile object at 0x00000000073ac288>
def _RealGetContents(self):
"""Read in the table of contents for the ZIP file."""
fp = self.fp
try:
endrec = _EndRecData(fp)
except IOError:
raise BadZipfile("File is not a zip file")
if not endrec:
> raise BadZipfile, "File is not a zip file"
E BadZipfile: File is not a zip file
../pypy/lib-python/2.7/zipfile.py:817: BadZipfile
____________________ TestProtocol.test_read_bad_versions[3] ____________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.test_pickle.TestProtocol object at 0x0000000006994aa0>
protocol = 3, get_random_path = '__FJp5sso12b__.pickle'
@pytest.mark.parametrize('protocol', [3, 4])
@pytest.mark.skipif(sys.version_info[:2] >= (3, 4),
reason="Testing invalid parameters for "
"Python 2.x and 3.y (y < 4).")
def test_read_bad_versions(self, protocol, get_random_path):
# For Python 2.x (respectively 3.y with y < 4), [expected]
# HIGHEST_PROTOCOL should be 2 (respectively 3). Hence, the protocol
# parameter should not exceed 2 (respectively 3).
if sys.version_info[:2] < (3, 0):
expect_hp = 2
else:
expect_hp = 3
with tm.assert_raises_regex(ValueError,
"pickle protocol %d asked for; the highest"
" available protocol is %d" % (protocol,
expect_hp)):
with tm.ensure_clean(get_random_path) as path:
df = tm.makeDataFrame()
> df.to_pickle(path, protocol=protocol)
pandas/tests/io/test_pickle.py:539:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/util/testing.py:2494: in __exit__
return self.exception_matches(exc_type, exc_value, trace_back)
pandas/util/testing.py:2529: in exception_matches
raise_with_traceback(e, trace_back)
pandas/tests/io/test_pickle.py:539: in test_read_bad_versions
df.to_pickle(path, protocol=protocol)
pandas/core/generic.py:1518: in to_pickle
protocol=protocol)
pandas/io/pickle.py:45: in to_pickle
pkl.dump(obj, f, protocol=protocol)
../pypy/lib_pypy/cPickle.py:119: in dump
Pickler(file, protocol).dump(obj)
../pypy/lib_pypy/cPickle.py:108: in __init__
PythonPickler.__init__(self, *args, **kw)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <cPickle.Pickler object at 0x00000000073ac330>
file = <closed file u'/tmp/tmpC0rtbE__FJp5sso12b__.pickle', mode 'wb' at 0x000000000509b5a0>
protocol = 3
def __init__(self, file, protocol=None):
"""This takes a file-like object for writing a pickle data stream.
The optional protocol argument tells the pickler to use the
given protocol; supported protocols are 0, 1, 2. The default
protocol is 0, to be backwards compatible. (Protocol 0 is the
only protocol that can be written to a file opened in text
mode and read back successfully. When using a protocol higher
than 0, make sure the file is opened in binary mode, both when
pickling and unpickling.)
Protocol 1 is more efficient than protocol 0; protocol 2 is
more efficient than protocol 1.
Specifying a negative protocol version selects the highest
protocol version supported. The higher the protocol used, the
more recent the version of Python needed to read the pickle
produced.
The file parameter must have a write() method that accepts a single
string argument. It can thus be an open file object, a StringIO
object, or any other custom object that meets this interface.
"""
if protocol is None:
protocol = 0
if protocol < 0:
protocol = HIGHEST_PROTOCOL
elif not 0 <= protocol <= HIGHEST_PROTOCOL:
> raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
E AssertionError: "pickle protocol 3 asked for; the highest available protocol is 2" does not match "pickle protocol must be <= 2"
../pypy/lib-python/2.7/pickle.py:202: AssertionError
____________________ TestProtocol.test_read_bad_versions[4] ____________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.test_pickle.TestProtocol object at 0x0000000006995558>
protocol = 4, get_random_path = '__FJp5sso12b__.pickle'
@pytest.mark.parametrize('protocol', [3, 4])
@pytest.mark.skipif(sys.version_info[:2] >= (3, 4),
reason="Testing invalid parameters for "
"Python 2.x and 3.y (y < 4).")
def test_read_bad_versions(self, protocol, get_random_path):
# For Python 2.x (respectively 3.y with y < 4), [expected]
# HIGHEST_PROTOCOL should be 2 (respectively 3). Hence, the protocol
# parameter should not exceed 2 (respectively 3).
if sys.version_info[:2] < (3, 0):
expect_hp = 2
else:
expect_hp = 3
with tm.assert_raises_regex(ValueError,
"pickle protocol %d asked for; the highest"
" available protocol is %d" % (protocol,
expect_hp)):
with tm.ensure_clean(get_random_path) as path:
df = tm.makeDataFrame()
> df.to_pickle(path, protocol=protocol)
pandas/tests/io/test_pickle.py:539:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/util/testing.py:2494: in __exit__
return self.exception_matches(exc_type, exc_value, trace_back)
pandas/util/testing.py:2529: in exception_matches
raise_with_traceback(e, trace_back)
pandas/tests/io/test_pickle.py:539: in test_read_bad_versions
df.to_pickle(path, protocol=protocol)
pandas/core/generic.py:1518: in to_pickle
protocol=protocol)
pandas/io/pickle.py:45: in to_pickle
pkl.dump(obj, f, protocol=protocol)
../pypy/lib_pypy/cPickle.py:119: in dump
Pickler(file, protocol).dump(obj)
../pypy/lib_pypy/cPickle.py:108: in __init__
PythonPickler.__init__(self, *args, **kw)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <cPickle.Pickler object at 0x00000000073ac0c8>
file = <closed file u'/tmp/tmpADIQvU__FJp5sso12b__.pickle', mode 'wb' at 0x000000000509b520>
protocol = 4
def __init__(self, file, protocol=None):
"""This takes a file-like object for writing a pickle data stream.
The optional protocol argument tells the pickler to use the
given protocol; supported protocols are 0, 1, 2. The default
protocol is 0, to be backwards compatible. (Protocol 0 is the
only protocol that can be written to a file opened in text
mode and read back successfully. When using a protocol higher
than 0, make sure the file is opened in binary mode, both when
pickling and unpickling.)
Protocol 1 is more efficient than protocol 0; protocol 2 is
more efficient than protocol 1.
Specifying a negative protocol version selects the highest
protocol version supported. The higher the protocol used, the
more recent the version of Python needed to read the pickle
produced.
The file parameter must have a write() method that accepts a single
string argument. It can thus be an open file object, a StringIO
object, or any other custom object that meets this interface.
"""
if protocol is None:
protocol = 0
if protocol < 0:
protocol = HIGHEST_PROTOCOL
elif not 0 <= protocol <= HIGHEST_PROTOCOL:
> raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
E AssertionError: "pickle protocol 4 asked for; the highest available protocol is 2" does not match "pickle protocol must be <= 2"
../pypy/lib-python/2.7/pickle.py:202: AssertionError
___________________________ TestStata.test_read_dta2 ___________________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.test_stata.TestStata object at 0x000000000d2a86b0>
def test_read_dta2(self):
if LooseVersion(sys.version) < '2.7':
pytest.skip('datetime interp under 2.6 is faulty')
expected = DataFrame.from_records(
[
(
datetime(2006, 11, 19, 23, 13, 20),
1479596223000,
datetime(2010, 1, 20),
datetime(2010, 1, 8),
datetime(2010, 1, 1),
datetime(1974, 7, 1),
datetime(2010, 1, 1),
datetime(2010, 1, 1)
),
(
datetime(1959, 12, 31, 20, 3, 20),
-1479590,
datetime(1953, 10, 2),
datetime(1948, 6, 10),
datetime(1955, 1, 1),
datetime(1955, 7, 1),
datetime(1955, 1, 1),
datetime(2, 1, 1)
),
(
pd.NaT,
pd.NaT,
pd.NaT,
pd.NaT,
pd.NaT,
pd.NaT,
pd.NaT,
pd.NaT,
)
],
columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date',
'monthly_date', 'quarterly_date', 'half_yearly_date',
'yearly_date']
)
expected['yearly_date'] = expected['yearly_date'].astype('O')
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
parsed_114 = self.read_dta(self.dta2_114)
parsed_115 = self.read_dta(self.dta2_115)
parsed_117 = self.read_dta(self.dta2_117)
# 113 is buggy due to limits of date format support in Stata
# parsed_113 = self.read_dta(self.dta2_113)
# Remove resource warnings
w = [x for x in w if x.category is UserWarning]
# should get warning for each call to read_dta
assert len(w) == 3
# buggy test because of the NaT comparison on certain platforms
# Format 113 test fails since it does not support tc and tC formats
# tm.assert_frame_equal(parsed_113, expected)
tm.assert_frame_equal(parsed_114, expected,
> check_datetimelike_compat=True)
pandas/tests/io/test_stata.py:190:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/util/testing.py:1397: in assert_frame_equal
obj='DataFrame.iloc[:, {0}]'.format(i))
pandas/util/testing.py:1264: in assert_series_equal
assert_numpy_array_equal(left.get_values(), right.get_values(),
../pypy/lib-python/2.7/bdb.py:49: in trace_dispatch
return self.dispatch_line(frame)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pdb.Pdb instance at 0x00000000093a60a0>
frame = <frame object at 0x0000000007a54a70>
def dispatch_line(self, frame):
if self.stop_here(frame) or self.break_here(frame):
self.user_line(frame)
> if self.quitting: raise BdbQuit
E BdbQuit
../pypy/lib-python/2.7/bdb.py:68: BdbQuit
_________________ TestFancy.test_index_not_contains[index0-2] __________________
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexing.test_indexing.TestFancy object at 0x0000000004c265d0>
index = Int64Index([0, 1, 2], dtype='int64'), val = '2'
@pytest.mark.parametrize("index,val", [
(pd.Index([0, 1, 2]), '2'),
(pd.Index([0, 1, '2']), 2),
(pd.Index([0, 1, 2, np.inf]), 4),
(pd.Index([0, 1, 2, np.nan]), 4),
(pd.Index([0, 1, 2, np.inf]), np.nan),
(pd.Index([0, 1, 2, np.nan]), np.inf),
# Checking if np.inf in Int64Index should not cause an OverflowError
# Related to GH 16957
(pd.Int64Index([0, 1, 2]), np.inf),
(pd.Int64Index([0, 1, 2]), np.nan),
(pd.UInt64Index([0, 1, 2]), np.inf),
(pd.UInt64Index([0, 1, 2]), np.nan),
])
def test_index_not_contains(self, index, val):
> assert val not in index
E AssertionError: assert '2' not in Int64Index([0, 1, 2], dtype='int64')
pandas/tests/indexing/test_indexing.py:605: AssertionError
______________________ TestFancy.test_index_type_coercion ______________________
[gw2] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.indexing.test_indexing.TestFancy object at 0x0000000004c0d718>
def test_index_type_coercion(self):
with catch_warnings(record=True):
# GH 11836
# if we have an index type and set it with something that looks
# to numpy like the same, but is actually, not
# (e.g. setting with a float or string '0')
# then we need to coerce to object
# integer indexes
for s in [Series(range(5)),
Series(range(5), index=range(1, 6))]:
assert s.index.is_integer()
for indexer in [lambda x: x.ix,
lambda x: x.loc,
lambda x: x]:
s2 = s.copy()
indexer(s2)[0.1] = 0
assert s2.index.is_floating()
assert indexer(s2)[0.1] == 0
s2 = s.copy()
indexer(s2)[0.0] = 0
exp = s.index
if 0 not in s:
exp = Index(s.index.tolist() + [0])
tm.assert_index_equal(s2.index, exp)
s2 = s.copy()
indexer(s2)['0'] = 0
> assert s2.index.is_object()
E assert False
E + where False = <bound method RangeIndex.is_object of RangeIndex(start=0, stop=5, step=1)>()
E + where <bound method RangeIndex.is_object of RangeIndex(start=0, stop=5, step=1)> = RangeIndex(start=0, stop=5, step=1).is_object
E + where RangeIndex(start=0, stop=5, step=1) = 0 0\n1 1\n2 2\n3 3\n4 4\ndtype: int64.index
pandas/tests/indexing/test_indexing.py:640: AssertionError
___________________________ TestStata.test_big_dates ___________________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.test_stata.TestStata object at 0x00000000070691d8>
def test_big_dates(self):
yr = [1960, 2000, 9999, 100, 2262, 1677]
mo = [1, 1, 12, 1, 4, 9]
dd = [1, 1, 31, 1, 22, 23]
hr = [0, 0, 23, 0, 0, 0]
mm = [0, 0, 59, 0, 0, 0]
ss = [0, 0, 59, 0, 0, 0]
expected = []
for i in range(len(yr)):
row = []
for j in range(7):
if j == 0:
row.append(
datetime(yr[i], mo[i], dd[i], hr[i], mm[i], ss[i]))
elif j == 6:
row.append(datetime(yr[i], 1, 1))
else:
row.append(datetime(yr[i], mo[i], dd[i]))
expected.append(row)
expected.append([NaT] * 7)
columns = ['date_tc', 'date_td', 'date_tw', 'date_tm', 'date_tq',
'date_th', 'date_ty']
# Fixes for weekly, quarterly,half,year
expected[2][2] = datetime(9999, 12, 24)
expected[2][3] = datetime(9999, 12, 1)
expected[2][4] = datetime(9999, 10, 1)
expected[2][5] = datetime(9999, 7, 1)
expected[4][2] = datetime(2262, 4, 16)
expected[4][3] = expected[4][4] = datetime(2262, 4, 1)
expected[4][5] = expected[4][6] = datetime(2262, 1, 1)
expected[5][2] = expected[5][3] = expected[
5][4] = datetime(1677, 10, 1)
expected[5][5] = expected[5][6] = datetime(1678, 1, 1)
expected = DataFrame(expected, columns=columns, dtype=np.object)
parsed_115 = read_stata(self.dta18_115)
parsed_117 = read_stata(self.dta18_117)
tm.assert_frame_equal(expected, parsed_115,
> check_datetimelike_compat=True)
pandas/tests/io/test_stata.py:771:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/util/testing.py:1397: in assert_frame_equal
obj='DataFrame.iloc[:, {0}]'.format(i))
pandas/util/testing.py:1264: in assert_series_equal
assert_numpy_array_equal(left.get_values(), right.get_values(),
../pypy/lib-python/2.7/bdb.py:49: in trace_dispatch
return self.dispatch_line(frame)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pdb.Pdb instance at 0x00000000089c2460>
frame = <frame object at 0x0000000009677620>
def dispatch_line(self, frame):
if self.stop_here(frame) or self.break_here(frame):
self.user_line(frame)
> if self.quitting: raise BdbQuit
E BdbQuit
../pypy/lib-python/2.7/bdb.py:68: BdbQuit
_______________ TestPandasContainer.test_frame_from_json_to_json _______________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.json.test_pandas.TestPandasContainer object at 0x0000000008526bb8>
def test_frame_from_json_to_json(self):
def _check_orient(df, orient, dtype=None, numpy=False,
convert_axes=True, check_dtype=True, raise_ok=None,
sort=None, check_index_type=True,
check_column_type=True, check_numpy_dtype=False):
if sort is not None:
df = df.sort_values(sort)
else:
df = df.sort_index()
# if we are not unique, then check that we are raising ValueError
# for the appropriate orients
if not df.index.is_unique and orient in ['index', 'columns']:
pytest.raises(
ValueError, lambda: df.to_json(orient=orient))
return
if (not df.columns.is_unique and
orient in ['index', 'columns', 'records']):
pytest.raises(
ValueError, lambda: df.to_json(orient=orient))
return
dfjson = df.to_json(orient=orient)
try:
unser = read_json(dfjson, orient=orient, dtype=dtype,
numpy=numpy, convert_axes=convert_axes)
except Exception as detail:
if raise_ok is not None:
if isinstance(detail, raise_ok):
return
raise
if sort is not None and sort in unser.columns:
unser = unser.sort_values(sort)
else:
unser = unser.sort_index()
if dtype is False:
check_dtype = False
if not convert_axes and df.index.dtype.type == np.datetime64:
unser.index = DatetimeIndex(
unser.index.values.astype('i8') * 1e6)
if orient == "records":
# index is not captured in this orientation
tm.assert_almost_equal(df.values, unser.values,
check_dtype=check_numpy_dtype)
tm.assert_index_equal(df.columns, unser.columns,
exact=check_column_type)
elif orient == "values":
# index and cols are not captured in this orientation
if numpy is True and df.shape == (0, 0):
assert unser.shape[0] == 0
else:
tm.assert_almost_equal(df.values, unser.values,
check_dtype=check_numpy_dtype)
elif orient == "split":
# index and col labels might not be strings
unser.index = [str(i) for i in unser.index]
unser.columns = [str(i) for i in unser.columns]
if sort is None:
unser = unser.sort_index()
tm.assert_almost_equal(df.values, unser.values,
check_dtype=check_numpy_dtype)
else:
if convert_axes:
tm.assert_frame_equal(df, unser, check_dtype=check_dtype,
check_index_type=check_index_type,
check_column_type=check_column_type)
else:
tm.assert_frame_equal(df, unser, check_less_precise=False,
check_dtype=check_dtype)
def _check_all_orients(df, dtype=None, convert_axes=True,
raise_ok=None, sort=None, check_index_type=True,
check_column_type=True):
# numpy=False
if convert_axes:
_check_orient(df, "columns", dtype=dtype, sort=sort,
check_index_type=False, check_column_type=False)
_check_orient(df, "records", dtype=dtype, sort=sort,
check_index_type=False, check_column_type=False)
_check_orient(df, "split", dtype=dtype, sort=sort,
check_index_type=False, check_column_type=False)
_check_orient(df, "index", dtype=dtype, sort=sort,
check_index_type=False, check_column_type=False)
_check_orient(df, "values", dtype=dtype, sort=sort,
check_index_type=False, check_column_type=False)
_check_orient(df, "columns", dtype=dtype,
convert_axes=False, sort=sort)
_check_orient(df, "records", dtype=dtype,
convert_axes=False, sort=sort)
_check_orient(df, "split", dtype=dtype,
convert_axes=False, sort=sort)
_check_orient(df, "index", dtype=dtype,
convert_axes=False, sort=sort)
_check_orient(df, "values", dtype=dtype,
convert_axes=False, sort=sort)
# numpy=True and raise_ok might be not None, so ignore the error
if convert_axes:
_check_orient(df, "columns", dtype=dtype, numpy=True,
raise_ok=raise_ok, sort=sort,
check_index_type=False, check_column_type=False)
_check_orient(df, "records", dtype=dtype, numpy=True,
raise_ok=raise_ok, sort=sort,
check_index_type=False, check_column_type=False)
_check_orient(df, "split", dtype=dtype, numpy=True,
raise_ok=raise_ok, sort=sort,
check_index_type=False, check_column_type=False)
_check_orient(df, "index", dtype=dtype, numpy=True,
raise_ok=raise_ok, sort=sort,
check_index_type=False, check_column_type=False)
_check_orient(df, "values", dtype=dtype, numpy=True,
raise_ok=raise_ok, sort=sort,
check_index_type=False, check_column_type=False)
_check_orient(df, "columns", dtype=dtype, numpy=True,
convert_axes=False, raise_ok=raise_ok, sort=sort)
_check_orient(df, "records", dtype=dtype, numpy=True,
convert_axes=False, raise_ok=raise_ok, sort=sort)
_check_orient(df, "split", dtype=dtype, numpy=True,
convert_axes=False, raise_ok=raise_ok, sort=sort)
_check_orient(df, "index", dtype=dtype, numpy=True,
convert_axes=False, raise_ok=raise_ok, sort=sort)
_check_orient(df, "values", dtype=dtype, numpy=True,
convert_axes=False, raise_ok=raise_ok, sort=sort)
# basic
> _check_all_orients(self.frame)
pandas/tests/io/json/test_pandas.py:274:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/tests/io/json/test_pandas.py:248: in _check_all_orients
check_index_type=False, check_column_type=False)
pandas/tests/io/json/test_pandas.py:167: in _check_orient
numpy=numpy, convert_axes=convert_axes)
pandas/io/json/json.py:356: in read_json
date_unit).parse()
pandas/io/json/json.py:421: in parse
self._parse_numpy()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pandas.io.json.json.FrameParser object at 0x000000000b8ef6e0>
def _parse_numpy(self):
json = self.json
orient = self.orient
if orient == "columns":
args = loads(json, dtype=None, numpy=True, labelled=True,
> precise_float=self.precise_float)
E SystemError: Bad internal call!
pandas/io/json/json.py:616: SystemError
________________________ TestStata.test_read_chunks_117 ________________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.test_stata.TestStata object at 0x0000000007001280>
def test_read_chunks_117(self):
files_117 = [self.dta1_117, self.dta2_117, self.dta3_117,
self.dta4_117, self.dta14_117, self.dta15_117,
self.dta16_117, self.dta17_117, self.dta18_117,
self.dta19_117, self.dta20_117]
for fname in files_117:
for chunksize in 1, 2:
for convert_categoricals in False, True:
for convert_dates in False, True:
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
parsed = read_stata(
fname,
convert_categoricals=convert_categoricals,
convert_dates=convert_dates)
itr = read_stata(
fname, iterator=True,
convert_categoricals=convert_categoricals,
convert_dates=convert_dates)
pos = 0
for j in range(5):
with warnings.catch_warnings(record=True) as w: # noqa
warnings.simplefilter("always")
try:
chunk = itr.read(chunksize)
except StopIteration:
break
from_frame = parsed.iloc[pos:pos + chunksize, :]
tm.assert_frame_equal(
from_frame, chunk, check_dtype=False,
check_datetimelike_compat=True,
> check_categorical=False)
pandas/tests/io/test_stata.py:1028:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/util/testing.py:1397: in assert_frame_equal
obj='DataFrame.iloc[:, {0}]'.format(i))
pandas/util/testing.py:1264: in assert_series_equal
assert_numpy_array_equal(left.get_values(), right.get_values(),
../pypy/lib-python/2.7/bdb.py:49: in trace_dispatch
return self.dispatch_line(frame)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pdb.Pdb instance at 0x000000000a70ce20>
frame = <frame object at 0x000000000a57f468>
def dispatch_line(self, frame):
if self.stop_here(frame) or self.break_here(frame):
self.user_line(frame)
> if self.quitting: raise BdbQuit
E BdbQuit
../pypy/lib-python/2.7/bdb.py:68: BdbQuit
________________________ TestStata.test_read_chunks_115 ________________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.test_stata.TestStata object at 0x0000000007001600>
def test_read_chunks_115(self):
files_115 = [self.dta2_115, self.dta3_115, self.dta4_115,
self.dta14_115, self.dta15_115, self.dta16_115,
self.dta17_115, self.dta18_115, self.dta19_115,
self.dta20_115]
for fname in files_115:
for chunksize in 1, 2:
for convert_categoricals in False, True:
for convert_dates in False, True:
# Read the whole file
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
parsed = read_stata(
fname,
convert_categoricals=convert_categoricals,
convert_dates=convert_dates)
# Compare to what we get when reading by chunk
itr = read_stata(
fname, iterator=True,
convert_dates=convert_dates,
convert_categoricals=convert_categoricals)
pos = 0
for j in range(5):
with warnings.catch_warnings(record=True) as w: # noqa
warnings.simplefilter("always")
try:
chunk = itr.read(chunksize)
except StopIteration:
break
from_frame = parsed.iloc[pos:pos + chunksize, :]
tm.assert_frame_equal(
from_frame, chunk, check_dtype=False,
check_datetimelike_compat=True,
> check_categorical=False)
pandas/tests/io/test_stata.py:1095:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/util/testing.py:1397: in assert_frame_equal
obj='DataFrame.iloc[:, {0}]'.format(i))
pandas/util/testing.py:1264: in assert_series_equal
assert_numpy_array_equal(left.get_values(), right.get_values(),
../pypy/lib-python/2.7/bdb.py:49: in trace_dispatch
return self.dispatch_line(frame)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pdb.Pdb instance at 0x000000000a70a560>
frame = <frame object at 0x000000000a57e650>
def dispatch_line(self, frame):
if self.stop_here(frame) or self.break_here(frame):
self.user_line(frame)
> if self.quitting: raise BdbQuit
E BdbQuit
../pypy/lib-python/2.7/bdb.py:68: BdbQuit
____________________ TestPandasContainer.test_misc_example _____________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.json.test_pandas.TestPandasContainer object at 0x0000000006e7ead8>
def test_misc_example(self):
# parsing unordered input fails
> result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True)
pandas/tests/io/json/test_pandas.py:777:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/io/json/json.py:356: in read_json
date_unit).parse()
pandas/io/json/json.py:421: in parse
self._parse_numpy()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <pandas.io.json.json.FrameParser object at 0x00000000035938a0>
def _parse_numpy(self):
json = self.json
orient = self.orient
if orient == "columns":
args = loads(json, dtype=None, numpy=True, labelled=True,
> precise_float=self.precise_float)
E SystemError: Bad internal call!
pandas/io/json/json.py:616: SystemError
____________ TestPandasContainer.test_data_frame_size_after_to_json ____________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.json.test_pandas.TestPandasContainer object at 0x0000000006ecac60>
def test_data_frame_size_after_to_json(self):
# GH15344
df = DataFrame({'a': [str(1)]})
> size_before = df.memory_usage(index=True, deep=True).sum()
pandas/tests/io/json/test_pandas.py:1082:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/core/frame.py:1869: in memory_usage
for col, c in self.iteritems()], index=self.columns)
pandas/core/series.py:2558: in memory_usage
v = super(Series, self).memory_usage(deep=deep)
pandas/core/base.py:1075: in memory_usage
v += lib.memory_usage_of_objects(self.values)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> s += arr[i].__sizeof__()
E AttributeError: 'str' object has no attribute '__sizeof__'
pandas/_libs/lib.pyx:128: AttributeError
________________ TestDataFrameFormatting.test_show_null_counts _________________
[gw1] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.formats.test_format.TestDataFrameFormatting object at 0x000000000a714c60>
def test_show_null_counts(self):
df = DataFrame(1, columns=range(10), index=range(10))
df.iloc[1, 1] = np.nan
def check(null_counts, result):
buf = StringIO()
df.info(buf=buf, null_counts=null_counts)
assert ('non-null' in buf.getvalue()) is result
with option_context('display.max_info_rows', 20,
'display.max_info_columns', 20):
> check(None, True)
pandas/tests/io/formats/test_format.py:157:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pandas/tests/io/formats/test_format.py:152: in check
df.info(buf=buf, null_counts=null_counts)
pandas/core/frame.py:1835: in info
mem_usage = self.memory_usage(index=True, deep=deep).sum()
pandas/core/frame.py:1871: in memory_usage
result = Series(self.index.memory_usage(deep=deep),
pandas/core/indexes/range.py:224: in memory_usage
return self.nbytes
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> val = self.func(obj)
E TypeError: sys.getsizeof() is not implemented on PyPy.
E
E First note that the CPython documentation says that this function may
E raise a TypeError, so if you are seeing it, it means that the program
E you are using is not correctly handling this case.
E
E On PyPy, though, it always raises TypeError. Before looking for
E alternatives, please take a moment to read the following explanation as
E to why it is the case. What you are looking for may not be possible.
E
E A memory profiler using this function is most likely to give results
E inconsistent with reality on PyPy. It would be possible to have
E sys.getsizeof() return a number (with enough work), but that may or
E may not represent how much memory the object uses. It doesn't even
E make really sense to ask how much *one* object uses, in isolation
E with the rest of the system. For example, instances have maps,
E which are often shared across many instances; in this case the maps
E would probably be ignored by an implementation of sys.getsizeof(),
E but their overhead is important in some cases if they are many
E instances with unique maps. Conversely, equal strings may share
E their internal string data even if they are different objects---or
E empty containers may share parts of their internals as long as they
E are empty. Even stranger, some lists create objects as you read
E them; if you try to estimate the size in memory of range(10**6) as
E the sum of all items' size, that operation will by itself create one
E million integer objects that never existed in the first place.
pandas/_libs/src/properties.pyx:34: TypeError
___________________ TestNumpyJSONTests.test_ArrayNumpyExcept ___________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.json.test_ujson.TestNumpyJSONTests object at 0x00000000057cb6a8>
def test_ArrayNumpyExcept(self):
input = ujson.dumps([42, {}, 'a'])
try:
ujson.decode(input, numpy=True)
assert False, "Expected exception!"
except(TypeError):
pass
except:
assert False, "Wrong exception"
input = ujson.dumps(['a', 'b', [], 'c'])
try:
ujson.decode(input, numpy=True)
assert False, "Expected exception!"
except(ValueError):
pass
except:
assert False, "Wrong exception"
input = ujson.dumps([['a'], 42])
try:
ujson.decode(input, numpy=True)
assert False, "Expected exception!"
except(ValueError):
pass
except:
assert False, "Wrong exception"
input = ujson.dumps([42, ['a'], 42])
try:
ujson.decode(input, numpy=True)
assert False, "Expected exception!"
except(ValueError):
pass
except:
assert False, "Wrong exception"
input = ujson.dumps([{}, []])
try:
ujson.decode(input, numpy=True)
assert False, "Expected exception!"
except(ValueError):
pass
except:
assert False, "Wrong exception"
input = ujson.dumps([42, None])
try:
ujson.decode(input, numpy=True)
assert False, "Expected exception!"
except(TypeError):
pass
except:
assert False, "Wrong exception"
input = ujson.dumps([{'a': 'b'}])
try:
ujson.decode(input, numpy=True, labelled=True)
assert False, "Expected exception!"
except(ValueError):
pass
except:
assert False, "Wrong exception"
input = ujson.dumps({'a': {'b': {'c': 42}}})
try:
ujson.decode(input, numpy=True, labelled=True)
assert False, "Expected exception!"
except(ValueError):
pass
except:
assert False, "Wrong exception"
input = ujson.dumps([{'a': 42, 'b': 23}, {'c': 17}])
try:
ujson.decode(input, numpy=True, labelled=True)
assert False, "Expected exception!"
except(ValueError):
pass
except:
> assert False, "Wrong exception"
E AssertionError: Wrong exception
E assert False
pandas/tests/io/json/test_ujson.py:1187: AssertionError
__________________ TestNumpyJSONTests.test_ArrayNumpyLabelled __________________
[gw3] linux2 -- Python 2.7.13 /home/matti/pypy_stuff/pypy-test/bin/pypy
self = <pandas.tests.io.json.test_ujson.TestNumpyJSONTests object at 0x00000000057cbd00>
def test_ArrayNumpyLabelled(self):
input = {'a': []}