Skip to content

Instantly share code, notes, and snippets.

@gustavofonseca
Created November 30, 2015 13:17
Show Gist options
  • Save gustavofonseca/b16eb70d2b5402b80180 to your computer and use it in GitHub Desktop.
Save gustavofonseca/b16eb70d2b5402b80180 to your computer and use it in GitHub Desktop.
Exemplo de script para coletar XMLs do articlemeta
#!/usr/bin/env python3
"""Script para exemplificar o uso do articlemeta.scielo.org
para coletar XMLs dos artigos da rede SciELO. Novamente, isso é
apenas um EXEMPLO!
"""
import json
from urllib import request
ISSNs = [
'0101-8175',
'0102-3306',
'1677-941X',
'1806-9657',
'1806-9088',
'1678-4685',
]
URL_IDENTIFICADORES = 'http://articlemeta.scielo.org/api/v1/article/identifiers/?issn={issn}'
URL_META_ARTIGO = 'http://articlemeta.scielo.org/api/v1/article/?code={pid}&format=xmlrsps'
def gera_pids(issns):
for issn in issns:
try:
resp = request.urlopen(URL_IDENTIFICADORES.format(issn=issn))
resp_texto = resp.read().decode('utf-8')
finally:
resp.close()
resp_py = json.loads(resp_texto)
for item in resp_py.get('objects', []):
yield item.get('code')
def baixa_e_salva(pid):
try:
resp = request.urlopen(URL_META_ARTIGO.format(pid=pid))
xml_data = resp.read()
finally:
resp.close()
with open(pid + '.xml', 'wb') as file:
file.write(xml_data)
pids = gera_pids(ISSNs)
for pid in pids:
baixa_e_salva(pid)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment