Skip to content

Instantly share code, notes, and snippets.

@pybokeh
Created January 25, 2019 22:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pybokeh/434e290a1af3636a3f84097e3aa45a72 to your computer and use it in GitHub Desktop.
Save pybokeh/434e290a1af3636a3f84097e3aa45a72 to your computer and use it in GitHub Desktop.
pandas_memory_error
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "import pandas as pd",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "pd.show_versions()",
"execution_count": 11,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "\nINSTALLED VERSIONS\n------------------\ncommit: None\npython: 3.7.0.final.0\npython-bits: 32\nOS: Windows\nOS-release: 10\nmachine: AMD64\nprocessor: Intel64 Family 6 Model 78 Stepping 3, GenuineIntel\nbyteorder: little\nLC_ALL: None\nLANG: None\nLOCALE: None.None\n\npandas: 0.23.4\npytest: None\npip: 19.0\nsetuptools: 39.0.1\nCython: None\nnumpy: 1.16.0\nscipy: 1.2.0\npyarrow: None\nxarray: None\nIPython: 6.5.0\nsphinx: None\npatsy: 0.5.0\ndateutil: 2.7.3\npytz: 2018.5\nblosc: None\nbottleneck: None\ntables: None\nnumexpr: None\nfeather: None\nmatplotlib: 3.0.0\nopenpyxl: 2.5.7\nxlrd: 1.1.0\nxlwt: None\nxlsxwriter: None\nlxml: 4.2.5\nbs4: 4.6.3\nhtml5lib: 1.0.1\nsqlalchemy: 1.2.14\npymysql: None\npsycopg2: None\njinja2: 2.10\ns3fs: None\nfastparquet: None\npandas_gbq: None\npandas_datareader: None\n"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df1 = pd.read_csv(r'some_path\\All_Plants_Budgeted_CPU_By_Group_SubGroup.csv')",
"execution_count": 7,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df1.info()",
"execution_count": 13,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 80040 entries, 0 to 80039\nData columns (total 10 columns):\nGraphCatID 80040 non-null int64\nGraphCatDesc 80040 non-null object\nGRP_NM 80040 non-null object\nSUBGRP_NM 80040 non-null object\nBudgeted_CPU 80040 non-null float64\nOriginalReserve_CPU 80040 non-null float64\nModelYear 80040 non-null int64\nFactory 80040 non-null object\nModelName 80040 non-null object\nTotal_CPU_GC_Level 80040 non-null float64\ndtypes: float64(3), int64(2), object(5)\nmemory usage: 4.6+ MB\n"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df2 = pd.read_csv(r'some_path\\Adjusted_CPUs.csv')",
"execution_count": 8,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "df2.info()",
"execution_count": 14,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 80040 entries, 0 to 80039\nData columns (total 6 columns):\nGraphCatID 80040 non-null int64\nGraphCatDesc 80040 non-null object\nGRP_NM 80040 non-null object\nSUBGRP_NM 80040 non-null object\nBudgeted_CPU 80040 non-null float64\nOriginalReserve_CPU 80040 non-null float64\ndtypes: float64(2), int64(1), object(3)\nmemory usage: 2.7+ MB\n"
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "merged = pd.merge(df1, df2, how='left', left_on=['GraphCatID'], right_on=['GraphCatID'])",
"execution_count": 12,
"outputs": [
{
"ename": "MemoryError",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mMemoryError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-12-27e42d200787>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmerged\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmerge\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf2\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mhow\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'left'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mleft_on\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'GraphCatID'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mright_on\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'GraphCatID'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32md:\\python37-32\\envs\\notebook\\lib\\site-packages\\pandas\\core\\reshape\\merge.py\u001b[0m in \u001b[0;36mmerge\u001b[1;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[0;32m 60\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindicator\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mindicator\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 61\u001b[0m validate=validate)\n\u001b[1;32m---> 62\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 63\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 64\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32md:\\python37-32\\envs\\notebook\\lib\\site-packages\\pandas\\core\\reshape\\merge.py\u001b[0m in \u001b[0;36mget_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 580\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mldata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlindexers\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mrdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrindexers\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 581\u001b[0m \u001b[0maxes\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mllabels\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrlabels\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mjoin_index\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 582\u001b[1;33m concat_axis=0, copy=self.copy)\n\u001b[0m\u001b[0;32m 583\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 584\u001b[0m \u001b[0mtyp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mleft\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32md:\\python37-32\\envs\\notebook\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mconcatenate_block_managers\u001b[1;34m(mgrs_indexers, axes, concat_axis, copy)\u001b[0m\n\u001b[0;32m 5419\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5420\u001b[0m b = make_block(\n\u001b[1;32m-> 5421\u001b[1;33m \u001b[0mconcatenate_join_units\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mjoin_units\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconcat_axis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5422\u001b[0m placement=placement)\n\u001b[0;32m 5423\u001b[0m \u001b[0mblocks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32md:\\python37-32\\envs\\notebook\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mconcatenate_join_units\u001b[1;34m(join_units, concat_axis, copy)\u001b[0m\n\u001b[0;32m 5563\u001b[0m to_concat = [ju.get_reindexed_values(empty_dtype=empty_dtype,\n\u001b[0;32m 5564\u001b[0m upcasted_na=upcasted_na)\n\u001b[1;32m-> 5565\u001b[1;33m for ju in join_units]\n\u001b[0m\u001b[0;32m 5566\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5567\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mto_concat\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32md:\\python37-32\\envs\\notebook\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 5563\u001b[0m to_concat = [ju.get_reindexed_values(empty_dtype=empty_dtype,\n\u001b[0;32m 5564\u001b[0m upcasted_na=upcasted_na)\n\u001b[1;32m-> 5565\u001b[1;33m for ju in join_units]\n\u001b[0m\u001b[0;32m 5566\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5567\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mto_concat\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32md:\\python37-32\\envs\\notebook\\lib\\site-packages\\pandas\\core\\internals.py\u001b[0m in \u001b[0;36mget_reindexed_values\u001b[1;34m(self, empty_dtype, upcasted_na)\u001b[0m\n\u001b[0;32m 5873\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0max\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindexers\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5874\u001b[0m values = algos.take_nd(values, indexer, axis=ax,\n\u001b[1;32m-> 5875\u001b[1;33m fill_value=fill_value)\n\u001b[0m\u001b[0;32m 5876\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5877\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mvalues\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32md:\\python37-32\\envs\\notebook\\lib\\site-packages\\pandas\\core\\algorithms.py\u001b[0m in \u001b[0;36mtake_nd\u001b[1;34m(arr, indexer, axis, out, fill_value, mask_info, allow_fill)\u001b[0m\n\u001b[0;32m 1653\u001b[0m \u001b[0mout\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mout_shape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'F'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1654\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1655\u001b[1;33m \u001b[0mout\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mout_shape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1656\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1657\u001b[0m func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis,\n",
"\u001b[1;31mMemoryError\u001b[0m: "
]
}
]
}
],
"metadata": {
"jupytext": {
"text_representation": {
"extension": ".md",
"format_name": "markdown",
"format_version": "1.0",
"jupytext_version": "0.8.6"
}
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"hide_input": false,
"language_info": {
"name": "python",
"version": "3.7.0",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"gist": {
"id": "",
"data": {
"description": "pandas_memory_error",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment