Skip to content

Instantly share code, notes, and snippets.

@pybokeh
Last active June 1, 2023 03:25
Show Gist options
  • Save pybokeh/59a764b38d7e70a40b4f3606b2236571 to your computer and use it in GitHub Desktop.
Save pybokeh/59a764b38d7e70a40b4f3606b2236571 to your computer and use it in GitHub Desktop.
When trying to print or view an ibis table expression created from ibis duckdb client read_csv() method, it just hangs forever
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "90ac79de-14eb-4529-af92-6c173777ec05",
"metadata": {},
"outputs": [],
"source": [
"import ibis\n",
"import ibis.selectors as s\n",
"import pandas as pd\n",
"from ibis import _\n",
"ibis.options.interactive = True\n",
"\n",
"# create a DuckDB client\n",
"client = ibis.duckdb.connect()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "381eda91-ab17-4f8d-ab65-6c81d57e88d3",
"metadata": {},
"outputs": [],
"source": [
"veh_crash_stats = client.read_csv('CrashStatistics.csv')"
]
},
{
"cell_type": "markdown",
"id": "5c8e5d56-2c12-42e3-a1af-fdc67c36903a",
"metadata": {},
"source": [
"Running this or trying to display the ibis table expression just hangs:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "15000496-29d0-41a8-bff0-ec77b39bf084",
"metadata": {},
"outputs": [],
"source": [
"veh_crash_stats"
]
},
{
"cell_type": "markdown",
"id": "f0383834-2b85-4dd4-8060-da5202d4622c",
"metadata": {},
"source": [
"#### But, if I read the csv using duckdb's read_csv() method, it fails immediately"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "4fe10d95-1f26-4852-b358-409f4f0f8875",
"metadata": {},
"outputs": [],
"source": [
"import duckdb"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "7145b633-4e49-49c5-8e44-b40568b6315a",
"metadata": {},
"outputs": [
{
"ename": "InvalidInputException",
"evalue": "Invalid Input Error: Error in file \"CrashStatistics.csv\" on line 3219: quote should be followed by end of value, end of row or another quote. ( file=CrashStatistics.csv\n delimiter=',' (auto detected)\n quote='\"' (auto detected)\n escape='\"' (auto detected)\n header=1 (auto detected)\n sample_size=20480\n ignore_errors=0\n all_varchar=0). ",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mInvalidInputException\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\apps\\Python39\\envs\\sql_dev\\lib\\site-packages\\IPython\\core\\formatters.py:708\u001b[0m, in \u001b[0;36mPlainTextFormatter.__call__\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 701\u001b[0m stream \u001b[38;5;241m=\u001b[39m StringIO()\n\u001b[0;32m 702\u001b[0m printer \u001b[38;5;241m=\u001b[39m pretty\u001b[38;5;241m.\u001b[39mRepresentationPrinter(stream, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose,\n\u001b[0;32m 703\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_width, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnewline,\n\u001b[0;32m 704\u001b[0m max_seq_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_seq_length,\n\u001b[0;32m 705\u001b[0m singleton_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msingleton_printers,\n\u001b[0;32m 706\u001b[0m type_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtype_printers,\n\u001b[0;32m 707\u001b[0m deferred_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdeferred_printers)\n\u001b[1;32m--> 708\u001b[0m \u001b[43mprinter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpretty\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 709\u001b[0m printer\u001b[38;5;241m.\u001b[39mflush()\n\u001b[0;32m 710\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m stream\u001b[38;5;241m.\u001b[39mgetvalue()\n",
"File \u001b[1;32m~\\apps\\Python39\\envs\\sql_dev\\lib\\site-packages\\IPython\\lib\\pretty.py:410\u001b[0m, in \u001b[0;36mRepresentationPrinter.pretty\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 407\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m meth(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[0;32m 408\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mobject\u001b[39m \\\n\u001b[0;32m 409\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__dict__\u001b[39m\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__repr__\u001b[39m\u001b[38;5;124m'\u001b[39m)):\n\u001b[1;32m--> 410\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_repr_pprint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcycle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 412\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _default_pprint(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[0;32m 413\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n",
"File \u001b[1;32m~\\apps\\Python39\\envs\\sql_dev\\lib\\site-packages\\IPython\\lib\\pretty.py:778\u001b[0m, in \u001b[0;36m_repr_pprint\u001b[1;34m(obj, p, cycle)\u001b[0m\n\u001b[0;32m 776\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A pprint that just redirects to the normal repr function.\"\"\"\u001b[39;00m\n\u001b[0;32m 777\u001b[0m \u001b[38;5;66;03m# Find newlines and replace them with p.break_()\u001b[39;00m\n\u001b[1;32m--> 778\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mrepr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 779\u001b[0m lines \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39msplitlines()\n\u001b[0;32m 780\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m p\u001b[38;5;241m.\u001b[39mgroup():\n",
"\u001b[1;31mInvalidInputException\u001b[0m: Invalid Input Error: Error in file \"CrashStatistics.csv\" on line 3219: quote should be followed by end of value, end of row or another quote. ( file=CrashStatistics.csv\n delimiter=',' (auto detected)\n quote='\"' (auto detected)\n escape='\"' (auto detected)\n header=1 (auto detected)\n sample_size=20480\n ignore_errors=0\n all_varchar=0). "
]
}
],
"source": [
"duckdb.read_csv('CrashStatistics.csv')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "bd374ef9-d20d-4eb0-8b95-ba786a54a233",
"metadata": {},
"outputs": [
{
"ename": "InvalidInputException",
"evalue": "Invalid Input Error: Error in file \"CrashStatistics.csv\" on line 3219: quote should be followed by end of value, end of row or another quote. ( file=CrashStatistics.csv\n delimiter=',' (auto detected)\n quote='\"' (auto detected)\n escape='\"' (auto detected)\n header=1 (auto detected)\n sample_size=20480\n ignore_errors=0\n all_varchar=0). ",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mInvalidInputException\u001b[0m Traceback (most recent call last)",
"File \u001b[1;32m~\\apps\\Python39\\envs\\sql_dev\\lib\\site-packages\\IPython\\core\\formatters.py:708\u001b[0m, in \u001b[0;36mPlainTextFormatter.__call__\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 701\u001b[0m stream \u001b[38;5;241m=\u001b[39m StringIO()\n\u001b[0;32m 702\u001b[0m printer \u001b[38;5;241m=\u001b[39m pretty\u001b[38;5;241m.\u001b[39mRepresentationPrinter(stream, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose,\n\u001b[0;32m 703\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_width, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnewline,\n\u001b[0;32m 704\u001b[0m max_seq_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_seq_length,\n\u001b[0;32m 705\u001b[0m singleton_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msingleton_printers,\n\u001b[0;32m 706\u001b[0m type_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtype_printers,\n\u001b[0;32m 707\u001b[0m deferred_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdeferred_printers)\n\u001b[1;32m--> 708\u001b[0m \u001b[43mprinter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpretty\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 709\u001b[0m printer\u001b[38;5;241m.\u001b[39mflush()\n\u001b[0;32m 710\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m stream\u001b[38;5;241m.\u001b[39mgetvalue()\n",
"File \u001b[1;32m~\\apps\\Python39\\envs\\sql_dev\\lib\\site-packages\\IPython\\lib\\pretty.py:410\u001b[0m, in \u001b[0;36mRepresentationPrinter.pretty\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 407\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m meth(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[0;32m 408\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mobject\u001b[39m \\\n\u001b[0;32m 409\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__dict__\u001b[39m\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__repr__\u001b[39m\u001b[38;5;124m'\u001b[39m)):\n\u001b[1;32m--> 410\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_repr_pprint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcycle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 412\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _default_pprint(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[0;32m 413\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n",
"File \u001b[1;32m~\\apps\\Python39\\envs\\sql_dev\\lib\\site-packages\\IPython\\lib\\pretty.py:778\u001b[0m, in \u001b[0;36m_repr_pprint\u001b[1;34m(obj, p, cycle)\u001b[0m\n\u001b[0;32m 776\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A pprint that just redirects to the normal repr function.\"\"\"\u001b[39;00m\n\u001b[0;32m 777\u001b[0m \u001b[38;5;66;03m# Find newlines and replace them with p.break_()\u001b[39;00m\n\u001b[1;32m--> 778\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mrepr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 779\u001b[0m lines \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39msplitlines()\n\u001b[0;32m 780\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m p\u001b[38;5;241m.\u001b[39mgroup():\n",
"\u001b[1;31mInvalidInputException\u001b[0m: Invalid Input Error: Error in file \"CrashStatistics.csv\" on line 3219: quote should be followed by end of value, end of row or another quote. ( file=CrashStatistics.csv\n delimiter=',' (auto detected)\n quote='\"' (auto detected)\n escape='\"' (auto detected)\n header=1 (auto detected)\n sample_size=20480\n ignore_errors=0\n all_varchar=0). "
]
}
],
"source": [
"duckdb.read_csv('CrashStatistics.csv', escapechar=None, quotechar=None)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "605f7beb-8672-4498-8387-7dc492f27c42",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\tOH-2023-0074,20233004904,Property Damage Only,,Deerfield,False,True,False,False,False,False,False,\t01314,1,98,Clermont County,Township,Goshen (Township of),1/12/2023 6:35:00 PM,31010,39.181742,-84.179611,,,,Deerfield,Road,10.00,Feet,West,,,,5807,House Number,,False,,False,On Roadway,Not Collision Between Two Vehicles in Transport,Clear,Dark - Roadway Not Lighted,False,False,False,False,,,Unit 1 was traveling Southbound on Deerfield Rd. a Deer ran in front of Unit 1 at 5807 Deerfield Rd. Unit 1 struck the deer causing functionable damage to the vehicle.,Police,False,1/12/2023 6:44:00 PM,1/12/2023 6:46:00 PM,1/12/2023 6:56:00 PM,1/12/2023 7:04:00 PM,0,0,Smith; Tyler ,36,Collier; Cody ,82,False,,,Straight Level,Dry,Blacktop; Bituminous; Asphalt,0,0,18,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False\n"
]
}
],
"source": [
"!head -n 3218 CrashStatistics.csv | tail -n 1"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "1db70878-78c2-4624-b182-538e19fb71ec",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\t2302076,20233004905,Property Damage Only,,,False,True,False,True,False,False,False,\t05215,2,1,Medina County,Township,Brunswick Hills (Township of),1/13/2023 3:56:00 PM,9708,41.260720,-81.860690,,,,SUBSTATION RD,Road,15.00,Feet,South,,,,GRAFTON RD,Intersection,Road,True,4,False,On Roadway,Rear-end,Snow,Daylight,False,False,False,False,,,Unit 2 travelling south on Substation Rd. was stopped at the Stop sign at Grafton Rd. when she was struck from behind by Unit 1. Driver of unit 1; also travelling south on Substation; states she was unable to slow down in time and swerved to avoid Unit 2; but lost control of the vehicle as it slid into Unit 2.,Police,False,1/13/2023 3:56:23 PM,1/13/2023 3:58:44 PM,1/13/2023 3:58:48 PM,1/13/2023 4:29:37 PM,60,0,ADRIAN NEAGU,1325,PRZEMYSLAW PIEKUT,1309,False,,,Straight Level,Snow,Blacktop; Bituminous; Asphalt,0,0,91,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False\n"
]
}
],
"source": [
"!head -n 3219 CrashStatistics.csv | tail -n 1"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "f4b038da-3447-42bf-8fb9-d3b37732ff08",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\t230031775,20233004906,Property Damage Only,,\"POLICE VEHICLE\" 134,False,False,False,False,False,False,False,\tCOP00,2,1,Franklin County,City,Columbus,1/13/2023 5:37:00 PM,18000,39.915730,-82.965950,,,,Lockbourne ,Road,20.00,Feet,North,State Route,104,,,Intersection,,True,4,False,On Roadway,Sideswipe; same direction,Snow,Dark - Lighted Roadway,False,False,False,False,,,Unit 1 and Unit 2 were traveling South on Lockbourne Rd. The operator of Unit 1 stated that he was changing lanes and did not see Unit 2 until he struck Unit 1. The Operator of Unit 2 stated that he was traveling straight ahead when Unit 1 went to switch lanes and struck Unit 2. The operator of Unit 1 was issued a citation for 2131.08A1 changing lanes without safety.,Police,False,1/13/2023 5:37:00 PM,1/13/2023 5:37:00 PM,1/13/2023 5:37:00 PM,1/13/2023 6:15:00 PM,0,43,DALZELL; EVAN,2723,BECKER; BRIAN,5232,False,,,Straight Level,Wet,Blacktop; Bituminous; Asphalt,0,0,38,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False\n"
]
}
],
"source": [
"!head -n 3220 CrashStatistics.csv | tail -n 1"
]
},
{
"cell_type": "markdown",
"id": "d9afcdc8-d014-4f26-8c71-abcdf10989ec",
"metadata": {},
"source": [
"#### Not seeing anything obviously bad with row 3219"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Py3.9 (sql_dev)",
"language": "python",
"name": "sql_dev"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment