Skip to content

Instantly share code, notes, and snippets.

@jamilatta
Last active September 21, 2015 19:09
Show Gist options
  • Save jamilatta/8d2695878f6d2ce15f3f to your computer and use it in GitHub Desktop.
Save jamilatta/8d2695878f6d2ce15f3f to your computer and use it in GitHub Desktop.
Script to pick up the growth of articles and issues in 2005 to 2016 (SciELO BR).
# coding: utf-8
import requests
from lxml import html
ISSN_URL = "http://articlemeta.scielo.org/api/v1/journal/identifiers/?collection=scl"
STATBILIO_URL = "http://statbiblio.scielo.org//stat_biblio/index.php?state=15&lang=en&country=scl&issn=ISSN&CITED[]=ISSN&YNG[]=YEAR"
YEARS = range(2005, 2016)
def main():
print "Get a list of ISSN"
issns = [issn['code'] for issn in requests.get(ISSN_URL).json()['objects']]
fp = open('cutus.out', 'w')
for year in YEARS:
for issn in issns:
for anyissn in issn:
ret = requests.get(STATBILIO_URL.replace("ISSN", anyissn).replace("YEAR", str(year)))
tree = html.fromstring(ret.content)
if not tree.xpath('//div[@class="message"]'):
n_issue = tree.xpath("//table//table//tr[4]/td[2]")[0].text_content()
n_article = tree.xpath("//table//table//tr[4]/td[3]")[0].text_content()
fp.write(';'.join([str(year), anyissn, n_issue, n_article + '\n']))
print (year, anyissn, n_issue, n_article)
fp.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment