Created
June 10, 2021 00:10
-
-
Save rafaelpezzuto/3b2acf729103f1f96c4763b4cc2c95d2 to your computer and use it in GitHub Desktop.
check static files update date
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"import pandas as pd\n", | |
"import sickle\n", | |
"\n", | |
"from articlemeta.client import RestfulClient" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"URL_STATIC_PDF_FILES = \"http://%s/static_pdf_files.txt\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"am = RestfulClient()\n", | |
"active_collections = [c for c in am.collections() if c[\"is_active\"] == True and c[\"type\"] == \"journals\" and c[\"has_analytics\"] == True and c['acron'] != 'scl']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_last_modified(url):\n", | |
" response = requests.get(url)\n", | |
" return response.headers.get(\"Last-Modified\") " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data_static_pdf_files = [{\"acron\": c[\"acron\"], \"original_name\": c[\"original_name\"], \"last_modified\": get_last_modified(URL_STATIC_PDF_FILES % c[\"domain\"])} for c in active_collections]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(20, 3)" | |
] | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.DataFrame(data_static_pdf_files)\n", | |
"df.shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>acron</th>\n", | |
" <th>original_name</th>\n", | |
" <th>last_modified</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>arg</td>\n", | |
" <td>Argentina</td>\n", | |
" <td>Wed, 02 Jun 2021 03:15:20 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>chl</td>\n", | |
" <td>Chile</td>\n", | |
" <td>Wed, 02 Jun 2021 09:01:03 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>col</td>\n", | |
" <td>Colombia</td>\n", | |
" <td>Wed, 02 Jun 2021 06:00:03 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>cub</td>\n", | |
" <td>Cuba</td>\n", | |
" <td>Thu, 17 Mar 2016 03:00:01 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>esp</td>\n", | |
" <td>España</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>mex</td>\n", | |
" <td>Mexico</td>\n", | |
" <td>Wed, 02 Jun 2021 12:00:08 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>prt</td>\n", | |
" <td>Portugal</td>\n", | |
" <td>Sun, 30 May 2021 21:00:25 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>spa</td>\n", | |
" <td>Saúde Pública</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>sza</td>\n", | |
" <td>South Africa</td>\n", | |
" <td>Tue, 01 Jun 2021 23:00:04 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>ven</td>\n", | |
" <td>Venezuela</td>\n", | |
" <td>Tue, 11 Feb 2020 19:48:30 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>bol</td>\n", | |
" <td>Bolivia</td>\n", | |
" <td>Wed, 02 Jun 2021 05:00:02 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>cri</td>\n", | |
" <td>Costa Rica</td>\n", | |
" <td>Tue, 01 Jun 2021 07:00:01 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>per</td>\n", | |
" <td>Peru</td>\n", | |
" <td>Mon, 25 Feb 2019 21:49:52 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>pry</td>\n", | |
" <td>Paraguay</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>ury</td>\n", | |
" <td>Uruguay</td>\n", | |
" <td>Wed, 02 Jun 2021 06:00:02 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>wid</td>\n", | |
" <td>West Indies</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>psi</td>\n", | |
" <td>PEPSIC</td>\n", | |
" <td>Tue, 01 Jun 2021 21:00:03 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>rve</td>\n", | |
" <td>REVENF</td>\n", | |
" <td>Tue, 01 Jun 2021 03:00:40 GMT</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>ecu</td>\n", | |
" <td>Ecuador</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>rvt</td>\n", | |
" <td>RevTur</td>\n", | |
" <td>None</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" acron original_name last_modified\n", | |
"0 arg Argentina Wed, 02 Jun 2021 03:15:20 GMT\n", | |
"1 chl Chile Wed, 02 Jun 2021 09:01:03 GMT\n", | |
"2 col Colombia Wed, 02 Jun 2021 06:00:03 GMT\n", | |
"3 cub Cuba Thu, 17 Mar 2016 03:00:01 GMT\n", | |
"4 esp España None\n", | |
"5 mex Mexico Wed, 02 Jun 2021 12:00:08 GMT\n", | |
"6 prt Portugal Sun, 30 May 2021 21:00:25 GMT\n", | |
"7 spa Saúde Pública None\n", | |
"8 sza South Africa Tue, 01 Jun 2021 23:00:04 GMT\n", | |
"9 ven Venezuela Tue, 11 Feb 2020 19:48:30 GMT\n", | |
"10 bol Bolivia Wed, 02 Jun 2021 05:00:02 GMT\n", | |
"11 cri Costa Rica Tue, 01 Jun 2021 07:00:01 GMT\n", | |
"12 per Peru Mon, 25 Feb 2019 21:49:52 GMT\n", | |
"13 pry Paraguay None\n", | |
"14 ury Uruguay Wed, 02 Jun 2021 06:00:02 GMT\n", | |
"15 wid West Indies None\n", | |
"16 psi PEPSIC Tue, 01 Jun 2021 21:00:03 GMT\n", | |
"17 rve REVENF Tue, 01 Jun 2021 03:00:40 GMT\n", | |
"18 ecu Ecuador None\n", | |
"19 rvt RevTur None" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"URL_OAI_PMH = \"http://%s/oai/scielo-oai.php?verb=ListRecords&metadataPrefix=oai_dc\"" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"oai_sic = sickle.Sickle('http://www.scielo.org.ar/oai/scielo-oai.php')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 75, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from datetime import datetime, timedelta\n", | |
"\n", | |
"def get_last_date_record():\n", | |
" for i in [datetime.now() - timedelta(days=x) for x in range(60)]:\n", | |
" d = oai_sic.ListRecords(from_date=i)\n", | |
" if len(d) > 0:\n", | |
" return d, True" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 76, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "HTTPError", | |
"evalue": "500 Server Error: Internal Server Error for url: http://www.scielo.org.ar/oai/scielo-oai.php?from_date=2021-06-02+13%3A18%3A21.972170&verb=ListRecords", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mHTTPError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-76-25795c1f60c2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_last_date_record\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m<ipython-input-75-ba035fc0cc6b>\u001b[0m in \u001b[0;36mget_last_date_record\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_last_date_record\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mdatetime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mtimedelta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdays\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m60\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0md\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moai_sic\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mListRecords\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfrom_date\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0md\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/Working/scielo/check-collection-sync/.venv/lib/python3.6/site-packages/sickle/app.py\u001b[0m in \u001b[0;36mListRecords\u001b[0;34m(self, ignore_deleted, **kwargs)\u001b[0m\n\u001b[1;32m 148\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'verb'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'ListRecords'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;31m# noinspection PyCallingNonCallable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 150\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_deleted\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mignore_deleted\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 151\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 152\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mListIdentifiers\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_deleted\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/Working/scielo/check-collection-sync/.venv/lib/python3.6/site-packages/sickle/iterator.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, sickle, params, ignore_deleted)\u001b[0m\n\u001b[1;32m 133\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmapper\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclass_mapping\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'verb'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0melement\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mVERBS_ELEMENTS\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'verb'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 135\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mOAIItemIterator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msickle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_deleted\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 136\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_next_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/Working/scielo/check-collection-sync/.venv/lib/python3.6/site-packages/sickle/iterator.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, sickle, params, ignore_deleted)\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'verb'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresumption_token\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 46\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_next_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 47\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__iter__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/Working/scielo/check-collection-sync/.venv/lib/python3.6/site-packages/sickle/iterator.py\u001b[0m in \u001b[0;36m_next_response\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_next_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 138\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mOAIItemIterator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_next_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 139\u001b[0m self._items = self.oai_response.xml.iterfind(\n\u001b[1;32m 140\u001b[0m './/' + self.sickle.oai_namespace + self.element)\n", | |
"\u001b[0;32m~/Working/scielo/check-collection-sync/.venv/lib/python3.6/site-packages/sickle/iterator.py\u001b[0m in \u001b[0;36m_next_response\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0;34m'verb'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m }\n\u001b[0;32m---> 84\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moai_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mharvest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 85\u001b[0m error = self.oai_response.xml.find(\n\u001b[1;32m 86\u001b[0m './/' + self.sickle.oai_namespace + 'error')\n", | |
"\u001b[0;32m~/Working/scielo/check-collection-sync/.venv/lib/python3.6/site-packages/sickle/app.py\u001b[0m in \u001b[0;36mharvest\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 128\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mretry_after\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[0mhttp_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 130\u001b[0;31m \u001b[0mhttp_response\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_for_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 131\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[0mhttp_response\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/Working/scielo/check-collection-sync/.venv/lib/python3.6/site-packages/requests/models.py\u001b[0m in \u001b[0;36mraise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 941\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 942\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhttp_error_msg\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 943\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mHTTPError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhttp_error_msg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 944\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 945\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mHTTPError\u001b[0m: 500 Server Error: Internal Server Error for url: http://www.scielo.org.ar/oai/scielo-oai.php?from_date=2021-06-02+13%3A18%3A21.972170&verb=ListRecords" | |
] | |
} | |
], | |
"source": [ | |
"get_last_date_record()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 51, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "SyntaxError", | |
"evalue": "unexpected EOF while parsing (<ipython-input-51-593277157ac2>, line 1)", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;36m File \u001b[0;32m\"<ipython-input-51-593277157ac2>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m data_oai = [{\"acron\": c[\"acron\"], \"original_name\": c[\"original_name\"], \"last_record_date\": get_last_record_date(URL_OAI_PMH % c[\"domain\"], \"2021-05-30\")} for c in active_collections[:1\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m unexpected EOF while parsing\n" | |
] | |
} | |
], | |
"source": [ | |
"data_oai = [{\"acron\": c[\"acron\"], \"original_name\": c[\"original_name\"], \"last_record_date\": get_last_record_date(URL_OAI_PMH % c[\"domain\"], \"2021-05-30\")} for c in active_collections[:1" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": ".venv", | |
"language": "python", | |
"name": ".venv" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment