Skip to content

Instantly share code, notes, and snippets.

@dalejung
Created August 13, 2012 20:57
Show Gist options
  • Save dalejung/3344040 to your computer and use it in GitHub Desktop.
Save dalejung/3344040 to your computer and use it in GitHub Desktop.
Test nbviewer #notebook-project #inactive
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "intraday binning error",
"notebook_path": "https://gist.github.com/3344040/Untitled0.ipynb"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"import pandas.util.testing as tm"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"start = datetime.datetime(1999, 3, 1, 5)\n",
"end = datetime.datetime(2012, 7, 31, 4)\n",
"bad_ind = pd.date_range(start, end, freq=\"30min\")\n",
"df = pd.DataFrame({'close':1}, index=bad_ind)\n",
"try:\n",
"\tdf.resample('AS', 'sum')\n",
"except ValueError as e:\n",
" print e"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Values falls after last bin\n"
]
}
],
"prompt_number": 74
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# These are the edges from come from _get_range_edges\n",
"edge_start = datetime.datetime(1998, 1, 1, 5)\n",
"edge_end = datetime.datetime(2013, 1, 1, 4)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"bad_ind = pd.DatetimeIndex(start=edge_start, end=edge_end, freq=\"AS\")\n",
"bad_ind\n",
"assert bad_ind[-1] < df.index[-1]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"good_ind = pd.DatetimeIndex(start=edge_start, end=datetime.datetime(2013, 1, 1, 5, 5), freq=\"AS\")\n",
"good_ind\n",
"assert not good_ind[-1] < df.index[-1]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Any value where the time() is less than the starting time will fail\n",
"from pandas.tseries.resample import _get_range_edges\n",
"bad_ends = pd.DatetimeIndex(start=start, freq=\"30min\", periods=10000)\n",
"bad_ends = bad_ends[bad_ends.hour < start.hour]\n",
"test = pd.DataFrame({'passed':np.nan, 'msg':''}, index=bad_ends)\n",
"test['bin_start'] = None\n",
"test['bin_end'] = None\n",
"offset = pd.offsets.YearBegin(months=1)\n",
"\n",
"for end in bad_ends:\n",
" df = pd.DataFrame({'close':1}, index=pd.DatetimeIndex(start=start, end=end, freq=\"30min\"))\n",
" try:\n",
" df.resample('AS', 'sum')\n",
" except ValueError as e:\n",
" test.passed.ix[end] = -1\n",
" assert str(e) == 'Values falls after last bin'\n",
" test.msg.ix[end] = str(e)\n",
" b_start, b_end = _get_range_edges(df.index, offset=offset, closed=\"right\")\n",
" test.bin_start.ix[end] = b_start\n",
" test.bin_end.ix[end] = b_end\n",
" else:\n",
" assert False, \"None should pass\"\n",
" test.passed.ix[end] = 1"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 152
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"test.head().stack()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 153,
"text": [
"1999-03-02 00:00:00 msg Values falls after last bin\n",
" passed -1\n",
" bin_start 1999-01-01 05:00:00\n",
" bin_end 2000-01-01 00:00:00\n",
"1999-03-02 00:30:00 msg Values falls after last bin\n",
" passed -1\n",
" bin_start 1999-01-01 05:00:00\n",
" bin_end 2000-01-01 00:30:00\n",
"1999-03-02 01:00:00 msg Values falls after last bin\n",
" passed -1\n",
" bin_start 1999-01-01 05:00:00\n",
" bin_end 2000-01-01 01:00:00\n",
"1999-03-02 01:30:00 msg Values falls after last bin\n",
" passed -1\n",
" bin_start 1999-01-01 05:00:00\n",
" bin_end 2000-01-01 01:30:00\n",
"1999-03-02 02:00:00 msg Values falls after last bin\n",
" passed -1\n",
" bin_start 1999-01-01 05:00:00\n",
" bin_end 2000-01-01 02:00:00"
]
}
],
"prompt_number": 153
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Monthly \n",
"========"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"start = datetime.datetime(1999, 3, 1, 5)\n",
"end = datetime.datetime(2012, 7, 31, 4)\n",
"bad_ind = pd.date_range(start, end, freq=\"30min\")\n",
"df = pd.DataFrame({'close':1}, index=bad_ind)\n",
"try:\n",
"\tdf.resample('MS', 'sum')\n",
"except ValueError as e:\n",
" print e"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Values falls after last bin\n"
]
}
],
"prompt_number": 155
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment