Last active
October 21, 2018 15:36
-
-
Save OriHoch/5e5b608f31916f27beafb40376860761 to your computer and use it in GitHub Desktop.
dataflows bug - loading and checkpointing from package with non-standard date/time format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Save checkpoint from old datapackage with non-standard date/time format" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"saving checkpoint to: .checkpoints/knesset-data-committees-kns_committee\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h3>kns_committee</h3>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<table>\n", | |
"<thead>\n", | |
"<tr><th># </th><th style=\"text-align: right;\"> CommitteeID\n", | |
"(integer)</th><th>Name\n", | |
"(string) </th><th style=\"text-align: right;\"> CategoryID\n", | |
"(integer)</th><th>CategoryDesc\n", | |
"(string) </th><th style=\"text-align: right;\"> KnessetNum\n", | |
"(integer)</th><th style=\"text-align: right;\"> CommitteeTypeID\n", | |
"(integer)</th><th>CommitteeTypeDesc\n", | |
"(string) </th><th>Email\n", | |
"(string) </th><th>StartDate\n", | |
"(datetime) </th><th>FinishDate\n", | |
"(datetime) </th><th style=\"text-align: right;\"> AdditionalTypeID\n", | |
"(integer)</th><th>AdditionalTypeDesc\n", | |
"(string) </th><th style=\"text-align: right;\"> ParentCommitteeID\n", | |
"(integer)</th><th>CommitteeParentName\n", | |
"(string) </th><th>IsCurrent\n", | |
"(boolean) </th><th>LastUpdatedDate\n", | |
"(datetime) </th></tr>\n", | |
"</thead>\n", | |
"<tbody>\n", | |
"<tr><td>1 </td><td style=\"text-align: right;\"> 1</td><td>הכנסת </td><td style=\"text-align: right;\"> 1</td><td>ועדת הכנסת </td><td style=\"text-align: right;\">15</td><td style=\"text-align: right;\">70</td><td>ועדת הכנסת</td><td>vadatk@knesset.gov.il </td><td>1999-06-07 00:00:00</td><td> </td><td style=\"text-align: right;\">991</td><td>קבועה </td><td style=\"text-align: right;\"> </td><td> </td><td>True</td><td>2017-04-24 16:47:06</td></tr>\n", | |
"<tr><td>2 </td><td style=\"text-align: right;\"> 2</td><td>הכספים </td><td style=\"text-align: right;\"> 2</td><td>ועדת הכספים </td><td style=\"text-align: right;\">15</td><td style=\"text-align: right;\">71</td><td>ועדה ראשית</td><td> </td><td>1999-06-07 00:00:00</td><td> </td><td style=\"text-align: right;\">991</td><td>קבועה </td><td style=\"text-align: right;\"> </td><td> </td><td>True</td><td>2015-03-20 12:02:57</td></tr>\n", | |
"<tr><td>...</td><td style=\"text-align: right;\"> </td><td> </td><td style=\"text-align: right;\"> </td><td> </td><td style=\"text-align: right;\"> </td><td style=\"text-align: right;\"> </td><td> </td><td> </td><td> </td><td> </td><td style=\"text-align: right;\"> </td><td> </td><td style=\"text-align: right;\"> </td><td> </td><td> </td><td> </td></tr>\n", | |
"<tr><td>723</td><td style=\"text-align: right;\">2072</td><td>ועדת המשנה לבניית תוכנית אב לטיפול באוטיזם במגזר הערבי</td><td style=\"text-align: right;\">681</td><td>ועדת המשנה לבניית תכנית אב לטיפול באוטיזם במגזר הערבי</td><td style=\"text-align: right;\">20</td><td style=\"text-align: right;\">74</td><td>ועדת משנה </td><td>vrevacha@knesset.gov.il</td><td>2018-10-17 00:00:00</td><td> </td><td style=\"text-align: right;\">992</td><td>מיוחדת</td><td style=\"text-align: right;\">928</td><td>ועדת העבודה, הרווחה והבריאות</td><td>True</td><td>2018-10-17 15:11:32</td></tr>\n", | |
"</tbody>\n", | |
"</table>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"checkpoint saved: knesset-data-committees-kns_committee\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"{'count_of_rows': 723,\n", | |
" 'bytes': 177400,\n", | |
" 'hash': 'a2b5eea8cc3714ed988e3e0192cf5eae',\n", | |
" 'dataset_name': None}" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from dataflows import Flow, load, checkpoint, printer\n", | |
"\n", | |
"!{'rm -rf .checkpoints/knesset-data-committees-kns_committee'}\n", | |
"\n", | |
"Flow(\n", | |
" load('https://production.oknesset.org/pipelines/data/committees/kns_committee/datapackage.json'),\n", | |
" checkpoint('knesset-data-committees-kns_committee'),\n", | |
" printer(tablefmt='html', num_rows=1)\n", | |
").process()[1]\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Load from the saved checkpoint" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"using checkpoint data from .checkpoints/knesset-data-committees-kns_committee\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<h3>kns_committee</h3>" | |
], | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"ERROR:root:Field \"StartDate\" can't cast value \"1999-06-07T00:00:00Z\" for type \"datetime\" with format \"%Y-%m-%d %H:%M:%S\"\n", | |
"ERROR:root:Field \"LastUpdatedDate\" can't cast value \"2017-04-24T16:47:06Z\" for type \"datetime\" with format \"%Y-%m-%d %H:%M:%S\"\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"{}" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"Flow(\n", | |
" checkpoint('knesset-data-committees-kns_committee'),\n", | |
" printer(tablefmt='html', num_rows=1)\n", | |
").process()[1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
channels: | |
- defaults | |
dependencies: | |
- python>=3.7 | |
- leveldb>=1.20 | |
- pip: | |
- dataflows[speedup]>=0.0.31 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment