Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save cchwala/7189654275c642f2f257dd1f1523e31d to your computer and use it in GitHub Desktop.
Save cchwala/7189654275c642f2f257dd1f1523e31d to your computer and use it in GitHub Desktop.
Show where pandas `to_timedelta()` overflows without raising an error
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In some very particular cases, the current version of `pandas.to_timedelta()` does not correctly raise an `OverflowError`. This is demonstrated below."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pandas version: 0.20.1\n",
" numpy version: 1.12.1\n"
]
}
],
"source": [
"print('pandas version: %s' % pd.__version__)\n",
"print(' numpy version: %s' % np.__version__)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Function to create floats with smallest increment "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def float_array_with_smallest_increments(initial_float, N_points_in_one_direction): \n",
" floats_upward = [initial_float, ]\n",
" floats_downward = [initial_float, ]\n",
" for i in range(N_points_in_one_direction):\n",
" floats_upward.append(np.nextafter(floats_upward[-1] , int_max))\n",
" floats_downward.append(np.nextafter(floats_downward[-1] , int_min)) \n",
" return np.array(floats_downward[::-1] + floats_upward[1:])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"int_min = np.iinfo(np.int64).min\n",
"int_max = np.iinfo(np.int64).max"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test overflow of `to_timedelta()` using seconds "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9223372036.85476684570312500000\n",
"9223372036.85476875305175781250\n",
"9223372036.85477066040039062500\n",
"9223372036.85477256774902343750\n",
"9223372036.85477447509765625000\n",
"9223372036.85477638244628906250\n",
"9223372036.85477828979492187500\n",
"9223372036.85478019714355468750\n",
"9223372036.85478210449218750000\n",
"9223372036.85478401184082031250\n",
"9223372036.85478591918945312500\n"
]
}
],
"source": [
"seconds_as_floats = float_array_with_smallest_increments(int_max/1e9, 5)\n",
"\n",
"for v in np.nditer(seconds_as_floats):\n",
" print('%.20f' % v)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TimedeltaIndex([ '106751 days 23:47:16.854767',\n",
" '106751 days 23:47:16.854769',\n",
" '106751 days 23:47:16.854771',\n",
" '106751 days 23:47:16.854773',\n",
" '106751 days 23:47:16.854774',\n",
" '-106752 days +00:12:43.145224',\n",
" '-106752 days +00:12:43.145226',\n",
" '-106752 days +00:12:43.145228',\n",
" '-106752 days +00:12:43.145230',\n",
" '-106752 days +00:12:43.145232',\n",
" '-106752 days +00:12:43.145234'],\n",
" dtype='timedelta64[ns]', freq=None)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.to_timedelta(seconds_as_floats, unit='s')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**It overflows without raising!**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test overflow of `to_timedelta()` using microseconds "
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9223372036854766.00000000000000000000\n",
"9223372036854768.00000000000000000000\n",
"9223372036854770.00000000000000000000\n",
"9223372036854772.00000000000000000000\n",
"9223372036854774.00000000000000000000\n",
"9223372036854776.00000000000000000000\n",
"9223372036854778.00000000000000000000\n",
"9223372036854780.00000000000000000000\n",
"9223372036854782.00000000000000000000\n",
"9223372036854784.00000000000000000000\n",
"9223372036854786.00000000000000000000\n"
]
}
],
"source": [
"microseconds_as_floats = float_array_with_smallest_increments(int_max/1e3, 5)\n",
"\n",
"for v in np.nditer(microseconds_as_floats):\n",
" print('%.20f' % v)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"ename": "OverflowError",
"evalue": "Python int too large to convert to C long",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mOverflowError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-8-190654f2ef57>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_timedelta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmicroseconds_as_floats\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'us'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36mto_timedelta\u001b[0;34m(arg, unit, box, errors)\u001b[0m\n\u001b[1;32m 80\u001b[0m errors=errors, name=arg.name)\n\u001b[1;32m 81\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mis_list_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ndim'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 82\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_convert_listlike\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0munit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbox\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbox\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 83\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ndim'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m raise TypeError('arg must be a string, timedelta, list, tuple, '\n",
"\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36m_convert_listlike\u001b[0;34m(arg, unit, box, errors, name)\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 163\u001b[0m value = tslib.array_to_timedelta64(_ensure_object(arg),\n\u001b[0;32m--> 164\u001b[0;31m unit=unit, errors=errors)\n\u001b[0m\u001b[1;32m 165\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'timedelta64[ns]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_timedelta64 (pandas/_libs/tslib.c:58701)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_timedelta64 (pandas/_libs/tslib.c:58408)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.convert_to_timedelta64 (pandas/_libs/tslib.c:61660)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.cast_from_unit (pandas/_libs/tslib.c:68471)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mOverflowError\u001b[0m: Python int too large to convert to C long"
]
}
],
"source": [
"pd.to_timedelta(microseconds_as_floats, unit='us')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**It correctly raises an OverflowError**"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"TimedeltaIndex(['106751 days 23:47:16.854766', '106751 days 23:47:16.854768',\n",
" '106751 days 23:47:16.854770', '106751 days 23:47:16.854772'],\n",
" dtype='timedelta64[ns]', freq=None)"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.to_timedelta(microseconds_as_floats[0:4], unit='us')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"ename": "OverflowError",
"evalue": "Python int too large to convert to C long",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mOverflowError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-10-9bedf031e2a6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_timedelta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmicroseconds_as_floats\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'us'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36mto_timedelta\u001b[0;34m(arg, unit, box, errors)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;31m# ...so it must be a scalar value. Return scalar.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m return _coerce_scalar_to_timedelta_type(arg, unit=unit,\n\u001b[0;32m---> 89\u001b[0;31m box=box, errors=errors)\n\u001b[0m\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36m_coerce_scalar_to_timedelta_type\u001b[0;34m(r, unit, box, errors)\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 133\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 134\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtslib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconvert_to_timedelta64\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 135\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'raise'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.convert_to_timedelta64 (pandas/_libs/tslib.c:62190)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.convert_to_timedelta64 (pandas/_libs/tslib.c:61660)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.cast_from_unit (pandas/_libs/tslib.c:68471)\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mOverflowError\u001b[0m: Python int too large to convert to C long"
]
}
],
"source": [
"pd.to_timedelta(microseconds_as_floats[5], unit='us')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment