mcleary/assert_frames_equal.ipynb

## assert_frames_equal.ipynb
{
 "metadata": {
  "name": "",
  "signature": "sha256:9c26f46f87352d6bedc804325404deca8e9cf8b7e2e0c151b7a2635f27e6d447"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import numpy as np\n",
      "import numpy.testing as npt\n",
      "import pandas as pd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 9
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def assert_frames_equal(actual, expected, use_close=False):\n",
      "    \"\"\"\n",
      "    Compare DataFrame items by index and column and\n",
      "    raise AssertionError if any item is not equal.\n",
      "\n",
      "    Ordering is unimportant, items are compared only by label.\n",
      "    NaN and infinite values are supported.\n",
      "    \n",
      "    Parameters\n",
      "    ----------\n",
      "    actual : pandas.DataFrame\n",
      "    expected : pandas.DataFrame\n",
      "    use_close : bool, optional\n",
      "        If True, use numpy.testing.assert_allclose instead of\n",
      "        numpy.testing.assert_equal.\n",
      "\n",
      "    \"\"\"\n",
      "    if use_close:\n",
      "        comp = npt.assert_allclose\n",
      "    else:\n",
      "        comp = npt.assert_equal\n",
      "\n",
      "    assert (isinstance(actual, pd.DataFrame) and\n",
      "            isinstance(expected, pd.DataFrame)), \\\n",
      "        'Inputs must both be pandas DataFrames.'\n",
      "\n",
      "    for i, exp_row in expected.iterrows():\n",
      "        assert i in actual.index, 'Expected row {!r} not found.'.format(i)\n",
      "\n",
      "        act_row = actual.loc[i]\n",
      "\n",
      "        for j, exp_item in exp_row.iteritems():\n",
      "            assert j in act_row.index, \\\n",
      "                'Expected column {!r} not found.'.format(j)\n",
      "\n",
      "            act_item = act_row[j]\n",
      "\n",
      "            try:\n",
      "                comp(act_item, exp_item)\n",
      "            except AssertionError as e:\n",
      "                raise AssertionError(\n",
      "                    e.message + '\\n\\nColumn: {!r}\\nRow: {!r}'.format(j, i))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 53
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "expected = pd.DataFrame({'a': [1, np.nan, 3],\n",
      "                         'b': [np.nan, 5, 6]},\n",
      "                        index=['x', 'y', 'z'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 54
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "actual = pd.DataFrame([[4, 1],\n",
      "                       [6, 3],\n",
      "                       [5, np.nan]],\n",
      "                      index=['x', 'z', 'y'],\n",
      "                      columns=['b', 'a'])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 55
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "assert_frames_equal(actual, actual)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 56
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "assert_frames_equal(actual, expected)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "ename": "AssertionError",
       "evalue": "\nItems are not equal:\n ACTUAL: 4.0\n DESIRED: nan\n\nColumn: 'b'\nRow: 'x'",
       "output_type": "pyerr",
       "traceback": [
        "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
        "\u001b[0;32m<ipython-input-57-2fa991ae8dd6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0massert_frames_equal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mactual\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexpected\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
        "\u001b[0;32m<ipython-input-53-fedbc359fc19>\u001b[0m in \u001b[0;36massert_frames_equal\u001b[0;34m(actual, expected)\u001b[0m\n\u001b[1;32m     24\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mAssertionError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     25\u001b[0m                 raise AssertionError(e.message + \n\u001b[0;32m---> 26\u001b[0;31m                                      '\\n\\nColumn: {!r}\\nRow: {!r}'.format(j, i))\n\u001b[0m",
        "\u001b[0;31mAssertionError\u001b[0m: \nItems are not equal:\n ACTUAL: 4.0\n DESIRED: nan\n\nColumn: 'b'\nRow: 'x'"
       ]
      }
     ],
     "prompt_number": 57
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}
	{
	"metadata": {
	"name": "",
	"signature": "sha256:9c26f46f87352d6bedc804325404deca8e9cf8b7e2e0c151b7a2635f27e6d447"
	},
	"nbformat": 3,
	"nbformat_minor": 0,
	"worksheets": [
	{
	"cells": [
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"import numpy as np\n",
	"import numpy.testing as npt\n",
	"import pandas as pd"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 9
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"def assert_frames_equal(actual, expected, use_close=False):\n",
	" \"\"\"\n",
	" Compare DataFrame items by index and column and\n",
	" raise AssertionError if any item is not equal.\n",
	"\n",
	" Ordering is unimportant, items are compared only by label.\n",
	" NaN and infinite values are supported.\n",
	" \n",
	" Parameters\n",
	" ----------\n",
	" actual : pandas.DataFrame\n",
	" expected : pandas.DataFrame\n",
	" use_close : bool, optional\n",
	" If True, use numpy.testing.assert_allclose instead of\n",
	" numpy.testing.assert_equal.\n",
	"\n",
	" \"\"\"\n",
	" if use_close:\n",
	" comp = npt.assert_allclose\n",
	" else:\n",
	" comp = npt.assert_equal\n",
	"\n",
	" assert (isinstance(actual, pd.DataFrame) and\n",
	" isinstance(expected, pd.DataFrame)), \\\n",
	" 'Inputs must both be pandas DataFrames.'\n",
	"\n",
	" for i, exp_row in expected.iterrows():\n",
	" assert i in actual.index, 'Expected row {!r} not found.'.format(i)\n",
	"\n",
	" act_row = actual.loc[i]\n",
	"\n",
	" for j, exp_item in exp_row.iteritems():\n",
	" assert j in act_row.index, \\\n",
	" 'Expected column {!r} not found.'.format(j)\n",
	"\n",
	" act_item = act_row[j]\n",
	"\n",
	" try:\n",
	" comp(act_item, exp_item)\n",
	" except AssertionError as e:\n",
	" raise AssertionError(\n",
	" e.message + '\\n\\nColumn: {!r}\\nRow: {!r}'.format(j, i))"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 53
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"expected = pd.DataFrame({'a': [1, np.nan, 3],\n",
	" 'b': [np.nan, 5, 6]},\n",
	" index=['x', 'y', 'z'])"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 54
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"actual = pd.DataFrame([[4, 1],\n",
	" [6, 3],\n",
	" [5, np.nan]],\n",
	" index=['x', 'z', 'y'],\n",
	" columns=['b', 'a'])"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 55
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"assert_frames_equal(actual, actual)"
	],
	"language": "python",
	"metadata": {},
	"outputs": [],
	"prompt_number": 56
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [
	"assert_frames_equal(actual, expected)"
	],
	"language": "python",
	"metadata": {},
	"outputs": [
	{
	"ename": "AssertionError",
	"evalue": "\nItems are not equal:\n ACTUAL: 4.0\n DESIRED: nan\n\nColumn: 'b'\nRow: 'x'",
	"output_type": "pyerr",
	"traceback": [
	"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
	"\u001b[0;32m<ipython-input-57-2fa991ae8dd6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0massert_frames_equal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mactual\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexpected\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
	"\u001b[0;32m<ipython-input-53-fedbc359fc19>\u001b[0m in \u001b[0;36massert_frames_equal\u001b[0;34m(actual, expected)\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAssertionError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m raise AssertionError(e.message + \n\u001b[0;32m---> 26\u001b[0;31m '\\n\\nColumn: {!r}\\nRow: {!r}'.format(j, i))\n\u001b[0m",
	"\u001b[0;31mAssertionError\u001b[0m: \nItems are not equal:\n ACTUAL: 4.0\n DESIRED: nan\n\nColumn: 'b'\nRow: 'x'"
	]
	}
	],
	"prompt_number": 57
	},
	{
	"cell_type": "code",
	"collapsed": false,
	"input": [],
	"language": "python",
	"metadata": {},
	"outputs": []
	}
	],
	"metadata": {}
	}
	]
	}