Created
August 13, 2012 20:57
-
-
Save dalejung/3344040 to your computer and use it in GitHub Desktop.
Test nbviewer #notebook-project #inactive
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "intraday binning error", | |
"notebook_path": "https://gist.github.com/3344040/Untitled0.ipynb" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import pandas as pd\n", | |
"import pandas.util.testing as tm" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"start = datetime.datetime(1999, 3, 1, 5)\n", | |
"end = datetime.datetime(2012, 7, 31, 4)\n", | |
"bad_ind = pd.date_range(start, end, freq=\"30min\")\n", | |
"df = pd.DataFrame({'close':1}, index=bad_ind)\n", | |
"try:\n", | |
"\tdf.resample('AS', 'sum')\n", | |
"except ValueError as e:\n", | |
" print e" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"Values falls after last bin\n" | |
] | |
} | |
], | |
"prompt_number": 74 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# These are the edges from come from _get_range_edges\n", | |
"edge_start = datetime.datetime(1998, 1, 1, 5)\n", | |
"edge_end = datetime.datetime(2013, 1, 1, 4)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"bad_ind = pd.DatetimeIndex(start=edge_start, end=edge_end, freq=\"AS\")\n", | |
"bad_ind\n", | |
"assert bad_ind[-1] < df.index[-1]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 16 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"good_ind = pd.DatetimeIndex(start=edge_start, end=datetime.datetime(2013, 1, 1, 5, 5), freq=\"AS\")\n", | |
"good_ind\n", | |
"assert not good_ind[-1] < df.index[-1]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 18 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# Any value where the time() is less than the starting time will fail\n", | |
"from pandas.tseries.resample import _get_range_edges\n", | |
"bad_ends = pd.DatetimeIndex(start=start, freq=\"30min\", periods=10000)\n", | |
"bad_ends = bad_ends[bad_ends.hour < start.hour]\n", | |
"test = pd.DataFrame({'passed':np.nan, 'msg':''}, index=bad_ends)\n", | |
"test['bin_start'] = None\n", | |
"test['bin_end'] = None\n", | |
"offset = pd.offsets.YearBegin(months=1)\n", | |
"\n", | |
"for end in bad_ends:\n", | |
" df = pd.DataFrame({'close':1}, index=pd.DatetimeIndex(start=start, end=end, freq=\"30min\"))\n", | |
" try:\n", | |
" df.resample('AS', 'sum')\n", | |
" except ValueError as e:\n", | |
" test.passed.ix[end] = -1\n", | |
" assert str(e) == 'Values falls after last bin'\n", | |
" test.msg.ix[end] = str(e)\n", | |
" b_start, b_end = _get_range_edges(df.index, offset=offset, closed=\"right\")\n", | |
" test.bin_start.ix[end] = b_start\n", | |
" test.bin_end.ix[end] = b_end\n", | |
" else:\n", | |
" assert False, \"None should pass\"\n", | |
" test.passed.ix[end] = 1" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 152 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"test.head().stack()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 153, | |
"text": [ | |
"1999-03-02 00:00:00 msg Values falls after last bin\n", | |
" passed -1\n", | |
" bin_start 1999-01-01 05:00:00\n", | |
" bin_end 2000-01-01 00:00:00\n", | |
"1999-03-02 00:30:00 msg Values falls after last bin\n", | |
" passed -1\n", | |
" bin_start 1999-01-01 05:00:00\n", | |
" bin_end 2000-01-01 00:30:00\n", | |
"1999-03-02 01:00:00 msg Values falls after last bin\n", | |
" passed -1\n", | |
" bin_start 1999-01-01 05:00:00\n", | |
" bin_end 2000-01-01 01:00:00\n", | |
"1999-03-02 01:30:00 msg Values falls after last bin\n", | |
" passed -1\n", | |
" bin_start 1999-01-01 05:00:00\n", | |
" bin_end 2000-01-01 01:30:00\n", | |
"1999-03-02 02:00:00 msg Values falls after last bin\n", | |
" passed -1\n", | |
" bin_start 1999-01-01 05:00:00\n", | |
" bin_end 2000-01-01 02:00:00" | |
] | |
} | |
], | |
"prompt_number": 153 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Monthly \n", | |
"========" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"start = datetime.datetime(1999, 3, 1, 5)\n", | |
"end = datetime.datetime(2012, 7, 31, 4)\n", | |
"bad_ind = pd.date_range(start, end, freq=\"30min\")\n", | |
"df = pd.DataFrame({'close':1}, index=bad_ind)\n", | |
"try:\n", | |
"\tdf.resample('MS', 'sum')\n", | |
"except ValueError as e:\n", | |
" print e" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"Values falls after last bin\n" | |
] | |
} | |
], | |
"prompt_number": 155 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment