Created
June 10, 2021 00:05
-
-
Save rafaelpezzuto/6bb35664590d89f34049266ec9129ec1 to your computer and use it in GitHub Desktop.
check last records - SciELO OAI-PMH
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests.exceptions as r_exceptions | |
import urllib3.exceptions as u_exceptions | |
from articlemeta.client import RestfulClient | |
from datetime import datetime, timedelta | |
from sickle import Sickle | |
from sickle.oaiexceptions import NoRecordsMatch | |
URL_STATIC_PDF_FILES = 'http://%s/static_pdf_files.txt' | |
URL_OAI_PMH = 'http://%s/oai/scielo-oai.php' | |
COLLECTIONS = ['col',] | |
UCOLS = ['bol', 'cri', 'rve', 'psi', 'ury'] | |
def main(): | |
am = RestfulClient() | |
active_collections = [c for c in am.collections()] | |
date_list = [datetime.now() - timedelta(days=x) for x in range(1000)] | |
for ac in active_collections: | |
if ac['acron'] in COLLECTIONS: | |
print(ac['acron'], URL_OAI_PMH % ac['domain']) | |
oai_client = Sickle(URL_OAI_PMH % ac['domain'], max_retries=1, verify=False) | |
exists = False | |
for dl in date_list: | |
if not exists: | |
print(ac['acron'], 'getting records', dl.strftime('%Y-%m-%d')) | |
counter = 0 | |
try: | |
for r in oai_client.ListRecords(**{'metadataPrefix': 'oai_dc', 'from': dl.strftime('%Y-%m-%d')}): | |
counter += 1 | |
if counter >= 1: | |
print(ac['acron'], 'getting records', dl.strftime('%Y-%m-%d'), r.header.identifier) | |
exists = True | |
break | |
except NoRecordsMatch: | |
pass | |
except u_exceptions.NewConnectionError: | |
pass | |
except u_exceptions.MaxRetryError: | |
pass | |
except r_exceptions.ConnectionError: | |
pass | |
except u_exceptions.TimeoutError: | |
pass | |
except r_exceptions.ReadTimeout: | |
pass | |
except r_exceptions.HTTPError: | |
pass | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment